docs/examples/usecases/github_issue_analysis.ipynb
To use the github repo issue loader, you need to set your github token in the environment.
See here for how to get a github token.
See llama-hub for more details about the loader.
%pip install llama-index-readers-github
%pip install llama-index-llms-openai
%pip install llama-index-program-openai
import os
os.environ["GITHUB_TOKEN"] = "<your github token>"
import os
from llama_index.readers.github import (
GitHubRepositoryIssuesReader,
GitHubIssuesClient,
)
github_client = GitHubIssuesClient()
loader = GitHubRepositoryIssuesReader(
github_client,
owner="jerryjliu",
repo="llama_index",
verbose=True,
)
docs = loader.load_data()
Quick inspection
docs[10].text
docs[10].metadata
%load_ext autoreload
%autoreload 2
from pydantic import BaseModel
from typing import List
from tqdm.asyncio import asyncio
from llama_index.program.openai import OpenAIPydanticProgram
from llama_index.llms.openai import OpenAI
from llama_index.core.async_utils import batch_gather
prompt_template_str = """\
Here is a Github Issue ticket.
{ticket}
Please extract central themes and output a list of tags.\
"""
class TagList(BaseModel):
"""A list of tags corresponding to central themes of an issue."""
tags: List[str]
program = OpenAIPydanticProgram.from_defaults(
prompt_template_str=prompt_template_str,
output_cls=TagList,
)
tasks = [program.acall(ticket=doc) for doc in docs]
output = await batch_gather(tasks, batch_size=10, verbose=True)
import pickle
with open("github_issue_analysis_data.pkl", "wb") as f:
pickle.dump(tag_lists, f)
with open("github_issue_analysis_data.pkl", "rb") as f:
tag_lists = pickle.load(f)
print(f"Loaded tag lists for {len(tag_lists)} tickets")
Build prompt
prompt = """
Here is a list of central themes (in the form of tags) extracted from a list of Github Issue tickets.
Tags for each ticket is separated by 2 newlines.
{tag_lists_str}
Please summarize the key takeaways and what we should prioritize to fix.
"""
tag_lists_str = "\n\n".join([str(tag_list) for tag_list in tag_lists])
prompt = prompt.format(tag_lists_str=tag_lists_str)
Summarize with GPT-4
from llama_index.llms.openai import OpenAI
response = OpenAI(model="gpt-4").stream_complete(prompt)
for r in response:
print(r.delta, end="")