import os import gradio as gr import openai from pathlib import Path from langchain import PromptTemplate, LLMChain import qdrant_client from dotenv import load_dotenv # from langchain.chains.qa_with_sources import load_qa_with_sources_chain from langchain.chains import RetrievalQA from langchain.chat_models import ChatOpenAI from langchain.chains.question_answering import load_qa_chain from langchain.embeddings.openai import OpenAIEmbeddings from langchain.llms import AzureOpenAI, OpenAI from langchain.text_splitter import CharacterTextSplitter from langchain.vectorstores import Qdrant from qdrant_client import QdrantClient # Constants collection_name="10ks" # collection_name="collectiveagreements" # Load the environment variables with the Azure OpenAI API key load_dotenv() # Initialize Azure OpenAI # openai.api_type = os.getenv("OPENAI_API_TYPE") # openai.api_base = os.getenv("OPENAI_API_BASE") openai.api_key = os.getenv("OPENAI_API_KEY") # openai.api_version = os.getenv("OPENAI_API_VERSION") # The data was vectorized with ADA, so we'll use that to convert our # query into a vector embeddings = OpenAIEmbeddings(model="text-embedding-ada-002") #, chunk_size=1) dbclient = QdrantClient("localhost", port=6333, grpc_port=6334, prefer_grpc=True) index=Qdrant(client=dbclient, collection_name=collection_name, embeddings=embeddings, vector_name="fragmentvector") # # Load the FAISS index # index = dbclient.retrieve. .load_local( # "collectiveagreements.db", # OpenAIEmbeddings(chunk_size=1, model="text-embedding-ada-002"), # ) # Open a connection to render the search results into test- this uses davinci-002. llm = OpenAI(deployment_name="davinci", temperature=0) # Open op a connection to do the querying # Chain type can be stuff, map_reduce or refine # chain = load_qa_with_sources_chain(llm, chain_type="map_reduce") def docquery(question): docs = index.similarity_search(question) print("Length of answer: ", len(docs)) # Process the query and return the results llm = ChatOpenAI(model_name="gpt-3.5-turbo", temperature=0) qa_chain = RetrievalQA.from_chain_type(llm,retriever=index.as_retriever(), return_source_documents=True) output=qa_chain({"query": question}) # output = chain.run(input_documents=docs, question=question) print("Is the error here?", output ) # myanswer = "##" + output.split("SOURCES")[0] references = "" # print("Docs:", docs) for i in docs: print("item: ", i.page_content) references = ( references + "**" + "** \n" + i.page_content.replace("\n", "") + "\n\n" ) return output['result'], references with gr.Blocks(title="Collective Agreement Search") as blocks: appname = gr.Markdown(value="# 10K filings search") appdesc = gr.Markdown( value="## The tabs below demonstration different ways to query the data." ) with gr.Tab("Ask a question"): appdesc = gr.Markdown( value="### This is a demo of an OpenAI-based question answering system. Type in a question and the system will return the answer and the source document." ) question = gr.Textbox( lines=1, label="Question: press enter to submit", value="Where is Babcock's head office?", ) answer = gr.Markdown(label="Answer") references = gr.Markdown(label="References") question.submit(docquery, question, outputs=[answer, references]) blocks.launch(share=True, server_name="0.0.0.0", server_port=8080)