embedchain/docs/components/data-sources/github.mdx
from embedchain.loaders.github import GithubLoader
loader = GithubLoader(
config={
"token":"ghp_xxxx"
}
)
import os
from embedchain.pipeline import Pipeline as App
os.environ["OPENAI_API_KEY"] = "sk-xxxx"
app = App()
app.add("repo:embedchain/embedchain type:repo", data_type="github", loader=loader)
response = app.query("What is Embedchain?")
# Answer: Embedchain is a Data Platform for Large Language Models (LLMs). It allows users to seamlessly load, index, retrieve, and sync unstructured data in order to build dynamic, LLM-powered applications. There is also a JavaScript implementation called embedchain-js available on GitHub.
The add function of the app will accept any valid github query with qualifiers. It only supports loading github code, repository, issues and pull-requests.
<Note>
You must provide qualifiers type: and repo: in the query. The type: qualifier can be a combination of code, repo, pr, issue, branch, file. The repo: qualifier must be a valid github repository name.
</Note>
from embedchain.chunkers.common_chunker import CommonChunker
from embedchain.config.add_config import ChunkerConfig
github_chunker_config = ChunkerConfig(chunk_size=2000, chunk_overlap=0, length_function=len)
github_chunker = CommonChunker(config=github_chunker_config)
app.add(load_query, data_type="github", loader=loader, chunker=github_chunker)