"""## Import necessary libraries""" import os import shutil import json from langchain.document_loaders import PyPDFLoader from langchain.document_loaders import PyPDFDirectoryLoader from langchain.llms import OpenAI from langchain.prompts import PromptTemplate from langchain.chains import LLMChain from langchain.output_parsers import PydanticOutputParser from pydantic import BaseModel, Field from langchain.document_loaders import YoutubeLoader from langchain.document_loaders import WebBaseLoader from langchain.text_splitter import CharacterTextSplitter from langchain.text_splitter import RecursiveCharacterTextSplitter from langchain.embeddings.openai import OpenAIEmbeddings from langchain.vectorstores import Chroma from langchain.chat_models import ChatOpenAI from langchain.chains import RetrievalQA #from google.colab import drive from google.oauth2 import service_account from google.cloud import translate_v2 as translate import gradio as gr """## Access KEY""" #ACCESS_KEY = os.environ.get("ACCESS_KEY") service_account_info = json.loads(os.environ.get("SERVICE_ACCOUNT_FILE")) credentials = service_account.Credentials.from_service_account_info(service_account_info) """ ## Load PDF """ class LoadPdf: def __init__(self, pdf_file): if not self.is_pdf_file(pdf_file): raise gr.Error("Invalid file extension. Please load a PDF file") self.pdf_file = pdf_file def is_pdf_file(self, file_path): _, file_extension = os.path.splitext(file_path) return file_extension.lower() == ".pdf" def read_file(self): loader = PyPDFLoader(self.pdf_file) data = loader.load() return data """## Request OpenAI""" class QuestionAnswer: def __init__(self, data, question, user_key): self.data = data self.question = question self.key = user_key def make_qa(self): #Splitter text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=100) splits = text_splitter.split_documents(self.data) #Persist dir persist_directory = 'files/chroma/' #EMbedings embedding = OpenAIEmbeddings(openai_api_key=self.key) retriever = Chroma.from_documents(documents=splits, embedding=embedding, persist_directory=persist_directory).as_retriever() # initialize the LLM llm = ChatOpenAI(temperature=0.2, model="gpt-3.5-turbo-16k", openai_api_key=self.key) question_answer = RetrievalQA.from_chain_type(llm=llm, retriever=retriever) make_question = f'{self.question}' return question_answer.run(make_question) """## Translation""" class TranslateOutput: def __init__(self, credentials): self.credentials = credentials def list_languages(self): client = translate.client.Client(credentials=self.credentials) languages = client.get_languages() language_names = [language['name'] for language in languages] return language_names def all_languages(self): client = translate.client.Client(credentials=self.credentials) languages = client.get_languages() return languages def translate_text(self, text, target_language): client = translate.client.Client(target_language=target_language, credentials=self.credentials) if isinstance(text, bytes): text = text.decode("utf-8") result = client.translate(text, target_language=target_language) return result["translatedText"] """## Run QA """ def run_qa(files,checkboxes,question,language,user_key): #secret_key = os.environ.get("SECRET_KEY") if user_key is None: return 'Introduza OpenAI API KEY' full_filenames = [file.name for file in files] available_files = [os.path.basename(path) for path in full_filenames] chosen_files = checkboxes # Filter files that are both available and chosen loadable_files = [file for file in available_files if file in chosen_files] # debug messages print(f"=> Available Files: {str(available_files)}") print(f"=> Chosen Files: {str(chosen_files)}") print(f"=> Question for Files: {str(question)}") print(f"=> Language to use: {str(language)}") # clear data data='' # Load files for file in loadable_files: print(f"=> Loading chosen file: {str(file)}") pdf_loader = LoadPdf("pdfs/"+file) data = pdf_loader.read_file() # Run the model qa = QuestionAnswer(data, question, user_key) answer_open_ai = qa.make_qa() # Translate output language_selected = language translate_output = TranslateOutput(credentials) for i in translate_output.all_languages(): if i['name'] == language_selected: iso_code = i['language'] break print(f"=> Answer OpenAI: {answer_open_ai}") print(f"=> Target Language IsoCode: {iso_code}") answer = translate_output.translate_text(answer_open_ai, target_language=iso_code) print(f"=> Translated Answer OpenAI: {answer}") return answer # Define a function to be called when files are uploaded def on_files_upload(files): # save files to files dir if not os.path.exists("pdfs"): os.makedirs("pdfs", exist_ok=True) # print(f"The directory 'pdfs' was created!"); files_dir = "pdfs" for fileobj in files: path = files_dir + "/" + os.path.basename(fileobj) shutil.copyfile(fileobj.name, path) # checkbox group update full_filenames = [file.name for file in files] filenames = [os.path.basename(path) for path in full_filenames] return(gr.CheckboxGroup(choices=filenames)) # Define a function to be called when files are cleared def on_files_cleared(): if os.path.exists("pdfs"): shutil.rmtree("pdfs") # print(f"The directory was removed!"); return(gr.CheckboxGroup(choices=[])) # Define the Gradio interface title = "Question/Answer over Documents" subtitle = "OpenAI GPT 3.5 Turbo LLM assisted Question/Answer over multiple PDF context documents" authors = "Hugo Cavalaria " custom_layout = "

{}

{}

{}

".format(title,subtitle,authors) # Get the list of languages available translate_output = TranslateOutput(credentials) language_names = [i for i in translate_output.list_languages()] # Gradio Interface with gr.Blocks() as interface: with gr.Row(): with gr.Column(scale=2): gr.HTML(custom_layout) with gr.Row(): with gr.Column(scale=1): upload_pdfs = gr.Files(label="Upload multiple PDF files.", interactive=True, file_types=['.pdf'], container=True) checkbox_group = gr.CheckboxGroup(label="Select the files to question.", choices=[], interactive=True) question_text = gr.Textbox(label="Question:") answer_language = gr.Dropdown(label="Answer translation to:", choices=language_names, value="Portuguese") secret_key = gr.Textbox(label="OpenAI API Key:") with gr.Column(scale=1): output_status = gr.Textbox(label="Answer:") btn = gr.Button("Ask") btn.click(fn=run_qa, inputs=[upload_pdfs,checkbox_group,question_text,answer_language,secret_key], outputs=[output_status]) upload_pdfs.upload(fn=on_files_upload, inputs=[upload_pdfs], outputs=[checkbox_group], show_progress="full") upload_pdfs.clear(fn=on_files_cleared, inputs=None, outputs=[checkbox_group]) """## Launch Interface""" # launch interface if __name__ == "__main__": interface.launch(share=False, debug=True)