Spaces:

scottsyms
/

Green_Procurement

Runtime error

App Files Files Community

Green_Procurement / app.py

scottsyms

Upload folder using huggingface_hub

7886e70 almost 2 years ago

raw

history blame

3.59 kB

	import os

	import gradio as gr
	import openai
	from pathlib import Path

	from langchain import PromptTemplate, LLMChain
	import qdrant_client
	from dotenv import load_dotenv
	# from langchain.chains.qa_with_sources import load_qa_with_sources_chain
	from langchain.chains import RetrievalQA
	from langchain.chat_models import ChatOpenAI
	from langchain.chains.question_answering import load_qa_chain
	from langchain.embeddings.openai import OpenAIEmbeddings
	from langchain.llms import AzureOpenAI, OpenAI
	from langchain.text_splitter import CharacterTextSplitter
	from langchain.vectorstores import Qdrant
	from qdrant_client import QdrantClient

	# Constants
	collection_name="10ks"
	# collection_name="collectiveagreements"


	# Load the environment variables with the Azure OpenAI API key
	load_dotenv()

	# Initialize Azure OpenAI
	# openai.api_type = os.getenv("OPENAI_API_TYPE")
	# openai.api_base = os.getenv("OPENAI_API_BASE")
	openai.api_key = os.getenv("OPENAI_API_KEY")
	# openai.api_version = os.getenv("OPENAI_API_VERSION")

	# The data was vectorized with ADA, so we'll use that to convert our
	# query into a vector
	embeddings = OpenAIEmbeddings(model="text-embedding-ada-002") #, chunk_size=1)

	dbclient = QdrantClient("localhost", port=6333, grpc_port=6334, prefer_grpc=True)

	index=Qdrant(client=dbclient, collection_name=collection_name, embeddings=embeddings, vector_name="fragmentvector")

	# # Load the FAISS index
	# index = dbclient.retrieve. .load_local(
	# "collectiveagreements.db",
	# OpenAIEmbeddings(chunk_size=1, model="text-embedding-ada-002"),
	# )

	# Open a connection to render the search results into test- this uses davinci-002.
	llm = OpenAI(deployment_name="davinci", temperature=0)

	# Open op a connection to do the querying
	# Chain type can be stuff, map_reduce or refine
	# chain = load_qa_with_sources_chain(llm, chain_type="map_reduce")

	def docquery(question):
	docs = index.similarity_search(question)
	print("Length of answer: ", len(docs))
	# Process the query and return the results
	llm = ChatOpenAI(model_name="gpt-3.5-turbo", temperature=0)
	qa_chain = RetrievalQA.from_chain_type(llm,retriever=index.as_retriever(), return_source_documents=True)
	output=qa_chain({"query": question})
	# output = chain.run(input_documents=docs, question=question)
	print("Is the error here?", output )
	# myanswer = "##" + output.split("SOURCES")[0]
	references = ""
	# print("Docs:", docs)
	for i in docs:
	print("item: ", i.page_content)
	references = (
	references
	+ "**"
	+ "** \n"
	+ i.page_content.replace("\n", "")
	+ "\n\n"
	)
	return output['result'], references

	with gr.Blocks(title="Collective Agreement Search") as blocks:
	appname = gr.Markdown(value="# 10K filings search")
	appdesc = gr.Markdown(
	value="## The tabs below demonstration different ways to query the data."
	)

	with gr.Tab("Ask a question"):
	appdesc = gr.Markdown(
	value="### This is a demo of an OpenAI-based question answering system. Type in a question and the system will return the answer and the source document."
	)
	question = gr.Textbox(
	lines=1,
	label="Question: press enter to submit",
	value="Where is Babcock's head office?",
	)
	answer = gr.Markdown(label="Answer")
	references = gr.Markdown(label="References")
	question.submit(docquery, question, outputs=[answer, references])


	blocks.launch(share=True, server_name="0.0.0.0", server_port=8080)