llama-index-integrations/readers/llama-index-readers-rayyan/examples/rayyan-loader.ipynb
pip install -r notebook-requirements.txt
Make sure you have a file named .env in the same directory as this notebook, with the following contents:
OPENAI_API_KEY=<your key here>
OPENAI_ORGANIZATION=<your organization here>
The organization is optional, but if you are part of multiple organizations, you can specify which one you want to use. Otherwise, the default organization will be used.
Optionally, to enable NewRelic monitoring, add the following to your .env file:
NEW_RELIC_APP_NAME=<your app name here>
NEW_RELIC_LICENSE_KEY=<your key here>
import os
import sys
import logging
from dotenv import load_dotenv
logging.basicConfig(stream=sys.stderr, level=logging.INFO)
logger = logging.getLogger(__name__)
load_dotenv() # take environment variables from .env.
logger.debug(f"NewRelic application: {os.getenv('NEW_RELIC_APP_NAME')}")
Make sure to have a Rayyan credentials file in rayyan-creds.json.
Check the Rayyan SDK for more details.
import os
from time import time
from nr_openai_observability import monitor
from llama_index import VectorStoreIndex, download_loader
if os.getenv("NEW_RELIC_APP_NAME") and os.getenv("NEW_RELIC_LICENSE_KEY"):
monitor.initialization(application_name=os.getenv("NEW_RELIC_APP_NAME"))
# Uncomment to download the loader from another repository
# RayyanReader = download_loader("RayyanReader", loader_hub_url="https://raw.githubusercontent.com/rayyansys/llama-hub/rayyan-loader/llama_hub")
RayyanReader = download_loader("RayyanReader")
loader = RayyanReader(credentials_path="rayyan-creds.json")
# documents = loader.load_data(review_id=746345, filters={"search[value]": "outcome"})
documents = loader.load_data(review_id=746345)
logger.info("Indexing articles...")
t1 = time()
review_index = VectorStoreIndex.from_documents(documents)
t2 = time()
logger.info(f"Done indexing articles in {t2 - t1:.2f} seconds.")
query_engine = review_index.as_query_engine()
prompts = [
"What are the most used interventions?",
"What is the most common population?",
"Are there studies about children?",
"Do we have any studies about COVID-19?",
"Are there any multi-center randomized clinical trials?",
]
for idx, prompt in enumerate(prompts):
print(f"❓ Query {idx + 1}/{len(prompts)}: {prompt}")
print("Waiting for response...")
response = query_engine.query(prompt)
print(f"🤖 {response.response}")
print("Relevant articles:")
for article in response.metadata.values():
print(f"- [{article['id']}] {article['title']}")
print()