packages/graphrag-input/example_notebooks/input_example.ipynb
# Copyright (c) 2026 Microsoft Corporation.
# Licensed under the MIT License.
Basic usage with the factory:
from graphrag_input import InputConfig, InputType, create_input_reader
from graphrag_storage import StorageConfig, create_storage
config = InputConfig(
type=InputType.Csv,
text_column="content",
title_column="title",
)
storage = create_storage(StorageConfig(base_dir="./input"))
reader = create_input_reader(config, storage)
documents = await reader.read_files()
from graphrag_input import InputConfig, InputType, create_input_reader
from graphrag_storage import StorageConfig, create_storage
config = InputConfig(type=InputType.MarkItDown, file_pattern=".*\\.pdf$")
storage = create_storage(StorageConfig(base_dir="./input"))
reader = create_input_reader(config, storage)
documents = await reader.read_files()
Note that when specifying column names for data extraction, we can handle nested objects (e.g., in JSON) with dot notation:
from graphrag_input import get_property
data = {"user": {"profile": {"name": "Alice"}}}
name = get_property(data, "user.profile.name") # Returns "Alice"
print(name)