Back to Graphrag

Copyright (c) 2026 Microsoft Corporation.

packages/graphrag-input/example_notebooks/input_example.ipynb

3.0.91.2 KB
Original Source
python
# Copyright (c) 2026 Microsoft Corporation.
# Licensed under the MIT License.

Markitdown support example

Basic usage with the factory:

python
from graphrag_input import InputConfig, InputType, create_input_reader
from graphrag_storage import StorageConfig, create_storage

config = InputConfig(
    type=InputType.Csv,
    text_column="content",
    title_column="title",
)
storage = create_storage(StorageConfig(base_dir="./input"))
reader = create_input_reader(config, storage)
documents = await reader.read_files()
python
from graphrag_input import InputConfig, InputType, create_input_reader
from graphrag_storage import StorageConfig, create_storage

config = InputConfig(type=InputType.MarkItDown, file_pattern=".*\\.pdf$")
storage = create_storage(StorageConfig(base_dir="./input"))
reader = create_input_reader(config, storage)
documents = await reader.read_files()

Note that when specifying column names for data extraction, we can handle nested objects (e.g., in JSON) with dot notation:

python
from graphrag_input import get_property

data = {"user": {"profile": {"name": "Alice"}}}
name = get_property(data, "user.profile.name")  # Returns "Alice"

print(name)