import gradio as gr from langchain.document_loaders import PDFMinerLoader, PyMuPDFLoader from langchain.text_splitter import CharacterTextSplitter import chromadb import chromadb.config from chromadb.config import Settings from transformers import T5ForConditionalGeneration, AutoTokenizer import torch import gradio as gr import uuid from sentence_transformers import SentenceTransformer import os model_name = 'google/flan-t5-base' model = T5ForConditionalGeneration.from_pretrained(model_name, device_map='auto', offload_folder="offload") tokenizer = AutoTokenizer.from_pretrained(model_name) print('flan read') ST_name = 'sentence-transformers/sentence-t5-base' st_model = SentenceTransformer(ST_name) print('sentence read') def get_context(query_text): query_emb = st_model.encode(query_text) query_response = collection.query(query_embeddings=query_emb.tolist(), n_results=4) context = query_response['documents'][0][0] context = context.replace('\n', ' ').replace(' ', ' ') return context def local_query(query, context): t5query = """Using the available context, please answer the question. If you aren't sure please say i don't know. Context: {} Question: {} """.format(context, query) inputs = tokenizer(t5query, return_tensors="pt") outputs = model.generate(**inputs, max_new_tokens=20) return tokenizer.batch_decode(outputs, skip_special_tokens=True) def run_query(query): context = get_context(query) result = local_query(query, context) return result def load_document(pdf_filename): loader = PDFMinerLoader(pdf_filename) doc = loader.load() text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=0) texts = text_splitter.split_documents(doc) texts = [i.page_content for i in texts] doc_emb = st_model.encode(texts) doc_emb = doc_emb.tolist() ids = [str(uuid.uuid1()) for _ in doc_emb] client = chromadb.Client() collection = client.create_collection("test_db") collection.add( embeddings=doc_emb, documents=texts, ids=ids ) return 'Success' import gradio as gr import os def upload_pdf(file): try: # Check if the file is not None before accessing its attributes if file is not None: # Save the uploaded file file_name = file.name # file_name = os.path.basename(file_name) messsage = load_document(file_name) return messsage else: return "No file uploaded." except Exception as e: return f"An error occurred: {e}" with gr.Blocks() as demo: radio = gr.Radio(value='gpt-3.5-turbo', choices=['gpt-3.5-turbo','gpt-4'], label='models') chatbot = gr.Chatbot(value=[], elem_id="chatbot").style(height=650) with gr.Row(): with gr.Column(scale=0.70): txt = gr.Textbox( show_label=False, placeholder="Enter text and press enter, or upload an image", ).style(container=False) # iface = gr.Interface( # fn=upload_pdf, # inputs="file", # outputs="text", # title="PDF File Uploader", # description="Upload a PDF file and get its filename.", # ) demo.launch()