Back to Llama Index

Github Issue Analysis

docs/examples/usecases/github_issue_analysis.ipynb

0.14.212.8 KB
Original Source

Github Issue Analysis

Setup

To use the github repo issue loader, you need to set your github token in the environment.

See here for how to get a github token.
See llama-hub for more details about the loader.

python
%pip install llama-index-readers-github
%pip install llama-index-llms-openai
%pip install llama-index-program-openai
python
import os

os.environ["GITHUB_TOKEN"] = "<your github token>"

Load Github Issue tickets

python
import os

from llama_index.readers.github import (
    GitHubRepositoryIssuesReader,
    GitHubIssuesClient,
)

github_client = GitHubIssuesClient()
loader = GitHubRepositoryIssuesReader(
    github_client,
    owner="jerryjliu",
    repo="llama_index",
    verbose=True,
)

docs = loader.load_data()

Quick inspection

python
docs[10].text
python
docs[10].metadata

Extract themes

python
%load_ext autoreload
%autoreload 2
python
from pydantic import BaseModel
from typing import List
from tqdm.asyncio import asyncio


from llama_index.program.openai import OpenAIPydanticProgram
from llama_index.llms.openai import OpenAI
from llama_index.core.async_utils import batch_gather
python
prompt_template_str = """\
Here is a Github Issue ticket.

{ticket}

Please extract central themes and output a list of tags.\
"""
python
class TagList(BaseModel):
    """A list of tags corresponding to central themes of an issue."""

    tags: List[str]
python
program = OpenAIPydanticProgram.from_defaults(
    prompt_template_str=prompt_template_str,
    output_cls=TagList,
)
python
tasks = [program.acall(ticket=doc) for doc in docs]
python
output = await batch_gather(tasks, batch_size=10, verbose=True)

[Optional] Save/Load Extracted Themes

python
import pickle
python
with open("github_issue_analysis_data.pkl", "wb") as f:
    pickle.dump(tag_lists, f)
python
with open("github_issue_analysis_data.pkl", "rb") as f:
    tag_lists = pickle.load(f)
    print(f"Loaded tag lists for {len(tag_lists)} tickets")

Summarize Themes

Build prompt

python
prompt = """
Here is a list of central themes (in the form of tags) extracted from a list of Github Issue tickets.
Tags for each ticket is separated by 2 newlines.

{tag_lists_str}

Please summarize the key takeaways and what we should prioritize to fix.
"""

tag_lists_str = "\n\n".join([str(tag_list) for tag_list in tag_lists])

prompt = prompt.format(tag_lists_str=tag_lists_str)

Summarize with GPT-4

python
from llama_index.llms.openai import OpenAI

response = OpenAI(model="gpt-4").stream_complete(prompt)
python
for r in response:
    print(r.delta, end="")