|
from openai import OpenAI |
|
import os, sys, time |
|
from typing import List, Optional |
|
|
|
class PDFChat: |
|
""" |
|
A class to interact with the OpenAI API to create an assistant for answering questions based on a PDF file. |
|
|
|
Attributes: |
|
client (OpenAI): Client for interacting with OpenAI API. |
|
assistant_id (Optional[str]): ID of the created assistant. None until an assistant is created. |
|
""" |
|
def __init__(self) -> None: |
|
""" |
|
Initializes the PDFAssistant with the API key from environment variables. |
|
""" |
|
api_key: Optional[str] = os.getenv("OPENAI_API_KEY") |
|
if api_key is None: |
|
raise ValueError("API Key not found in environment variables") |
|
self.client = OpenAI(api_key=api_key) |
|
self.assistant_id: Optional[str] = None |
|
|
|
def upload_file(self, filename: str) -> None: |
|
""" |
|
Uploads a file to the OpenAI API and creates an assistant related to that file. |
|
|
|
Args: |
|
filename (str): The path to the file to be uploaded. |
|
""" |
|
file = self.client.files.create( |
|
file=open(filename, 'rb'), |
|
purpose="assistants" |
|
) |
|
|
|
assistant = self.client.beta.assistants.create( |
|
name="PDF Helper", |
|
instructions="You are my assistant who can answer questions from the given pdf", |
|
tools=[{"type": "retrieval"}], |
|
model="gpt-3.5-turbo-0125", |
|
file_ids=[file.id] |
|
) |
|
self.assistant_id = assistant.id |
|
|
|
def get_answers(self, question: str) -> List[str]: |
|
""" |
|
Asks a question to the assistant and retrieves the answers. |
|
|
|
Args: |
|
question (str): The question to be asked to the assistant. |
|
|
|
Returns: |
|
List[str]: A list of answers from the assistant. |
|
|
|
Raises: |
|
ValueError: If the assistant has not been created yet. |
|
""" |
|
if self.assistant_id is None: |
|
raise ValueError("Assistant not created. Please upload a file first.") |
|
|
|
thread = self.client.beta.threads.create() |
|
|
|
self.client.beta.threads.messages.create( |
|
thread_id=thread.id, |
|
role="user", |
|
content=question |
|
) |
|
|
|
run = self.client.beta.threads.runs.create( |
|
thread_id=thread.id, |
|
assistant_id=self.assistant_id |
|
) |
|
|
|
while True: |
|
run_status = self.client.beta.threads.runs.retrieve(thread_id=thread.id, run_id=run.id) |
|
time.sleep(10) |
|
if run_status.status == 'completed': |
|
messages = self.client.beta.threads.messages.list(thread_id=thread.id) |
|
break |
|
else: |
|
time.sleep(2) |
|
|
|
return [message.content[0].text.value for message in messages.data if message.role == "assistant"] |
|
|
|
|
|
if __name__ == "__main__": |
|
client = PDFChat() |
|
filename = sys.argv[1] |
|
client.upload_file(filename) |
|
|
|
while True: |
|
question = input("Enter your question (or type 'exit' to quit): ") |
|
if question.lower() in ['exit', 'quit']: |
|
break |
|
|
|
answers = client.get_answers(question) |
|
for answer in answers: |
|
print(answer) |