Spaces:
Runtime error
Runtime error
Upload folder using huggingface_hub
Browse files- Dockerfile +24 -0
- README.md +3 -9
- __pycache__/app.cpython-311.pyc +0 -0
- app.py +98 -0
Dockerfile
ADDED
@@ -0,0 +1,24 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
FROM python:3.10
|
2 |
+
|
3 |
+
WORKDIR /usr/src/app
|
4 |
+
# RUN mkdir /usr/src/app/greenprocurementdb2
|
5 |
+
# RUN mkdir /usr/src/app/pickle
|
6 |
+
# RUN mkdir /usr/src/app/data
|
7 |
+
|
8 |
+
|
9 |
+
|
10 |
+
COPY requirements.txt ./
|
11 |
+
COPY ./greenprocurementdb2 ./greenprocurementdb2
|
12 |
+
COPY ./pickle ./pickle
|
13 |
+
COPY ./data ./data
|
14 |
+
COPY ./model ./model
|
15 |
+
COPY ./summaries ./summaries
|
16 |
+
|
17 |
+
RUN pip install --upgrade pip
|
18 |
+
RUN pip install --no-cache-dir -r requirements.txt
|
19 |
+
|
20 |
+
COPY ./app.py ./
|
21 |
+
|
22 |
+
EXPOSE 7860
|
23 |
+
|
24 |
+
CMD [ "python", "./app.py" ]
|
README.md
CHANGED
@@ -1,12 +1,6 @@
|
|
1 |
---
|
2 |
-
title:
|
3 |
-
emoji: 🦀
|
4 |
-
colorFrom: yellow
|
5 |
-
colorTo: purple
|
6 |
-
sdk: gradio
|
7 |
-
sdk_version: 3.44.4
|
8 |
app_file: app.py
|
9 |
-
|
|
|
10 |
---
|
11 |
-
|
12 |
-
Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
|
|
|
1 |
---
|
2 |
+
title: Green_Procurement
|
|
|
|
|
|
|
|
|
|
|
3 |
app_file: app.py
|
4 |
+
sdk: gradio
|
5 |
+
sdk_version: 3.44.3
|
6 |
---
|
|
|
|
__pycache__/app.cpython-311.pyc
ADDED
Binary file (4.34 kB). View file
|
|
app.py
ADDED
@@ -0,0 +1,98 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import os
|
2 |
+
|
3 |
+
import gradio as gr
|
4 |
+
import openai
|
5 |
+
from pathlib import Path
|
6 |
+
|
7 |
+
from langchain import PromptTemplate, LLMChain
|
8 |
+
import qdrant_client
|
9 |
+
from dotenv import load_dotenv
|
10 |
+
# from langchain.chains.qa_with_sources import load_qa_with_sources_chain
|
11 |
+
from langchain.chains import RetrievalQA
|
12 |
+
from langchain.chat_models import ChatOpenAI
|
13 |
+
from langchain.chains.question_answering import load_qa_chain
|
14 |
+
from langchain.embeddings.openai import OpenAIEmbeddings
|
15 |
+
from langchain.llms import AzureOpenAI, OpenAI
|
16 |
+
from langchain.text_splitter import CharacterTextSplitter
|
17 |
+
from langchain.vectorstores import Qdrant
|
18 |
+
from qdrant_client import QdrantClient
|
19 |
+
|
20 |
+
# Constants
|
21 |
+
collection_name="10ks"
|
22 |
+
# collection_name="collectiveagreements"
|
23 |
+
|
24 |
+
|
25 |
+
# Load the environment variables with the Azure OpenAI API key
|
26 |
+
load_dotenv()
|
27 |
+
|
28 |
+
# Initialize Azure OpenAI
|
29 |
+
# openai.api_type = os.getenv("OPENAI_API_TYPE")
|
30 |
+
# openai.api_base = os.getenv("OPENAI_API_BASE")
|
31 |
+
openai.api_key = os.getenv("OPENAI_API_KEY")
|
32 |
+
# openai.api_version = os.getenv("OPENAI_API_VERSION")
|
33 |
+
|
34 |
+
# The data was vectorized with ADA, so we'll use that to convert our
|
35 |
+
# query into a vector
|
36 |
+
embeddings = OpenAIEmbeddings(model="text-embedding-ada-002") #, chunk_size=1)
|
37 |
+
|
38 |
+
dbclient = QdrantClient("localhost", port=6333, grpc_port=6334, prefer_grpc=True)
|
39 |
+
|
40 |
+
index=Qdrant(client=dbclient, collection_name=collection_name, embeddings=embeddings, vector_name="fragmentvector")
|
41 |
+
|
42 |
+
# # Load the FAISS index
|
43 |
+
# index = dbclient.retrieve. .load_local(
|
44 |
+
# "collectiveagreements.db",
|
45 |
+
# OpenAIEmbeddings(chunk_size=1, model="text-embedding-ada-002"),
|
46 |
+
# )
|
47 |
+
|
48 |
+
# Open a connection to render the search results into test- this uses davinci-002.
|
49 |
+
llm = OpenAI(deployment_name="davinci", temperature=0)
|
50 |
+
|
51 |
+
# Open op a connection to do the querying
|
52 |
+
# Chain type can be stuff, map_reduce or refine
|
53 |
+
# chain = load_qa_with_sources_chain(llm, chain_type="map_reduce")
|
54 |
+
|
55 |
+
def docquery(question):
|
56 |
+
docs = index.similarity_search(question)
|
57 |
+
print("Length of answer: ", len(docs))
|
58 |
+
# Process the query and return the results
|
59 |
+
llm = ChatOpenAI(model_name="gpt-3.5-turbo", temperature=0)
|
60 |
+
qa_chain = RetrievalQA.from_chain_type(llm,retriever=index.as_retriever(), return_source_documents=True)
|
61 |
+
output=qa_chain({"query": question})
|
62 |
+
# output = chain.run(input_documents=docs, question=question)
|
63 |
+
print("Is the error here?", output )
|
64 |
+
# myanswer = "##" + output.split("SOURCES")[0]
|
65 |
+
references = ""
|
66 |
+
# print("Docs:", docs)
|
67 |
+
for i in docs:
|
68 |
+
print("item: ", i.page_content)
|
69 |
+
references = (
|
70 |
+
references
|
71 |
+
+ "**"
|
72 |
+
+ "** \n"
|
73 |
+
+ i.page_content.replace("\n", "")
|
74 |
+
+ "\n\n"
|
75 |
+
)
|
76 |
+
return output['result'], references
|
77 |
+
|
78 |
+
with gr.Blocks(title="Collective Agreement Search") as blocks:
|
79 |
+
appname = gr.Markdown(value="# 10K filings search")
|
80 |
+
appdesc = gr.Markdown(
|
81 |
+
value="## The tabs below demonstration different ways to query the data."
|
82 |
+
)
|
83 |
+
|
84 |
+
with gr.Tab("Ask a question"):
|
85 |
+
appdesc = gr.Markdown(
|
86 |
+
value="### This is a demo of an OpenAI-based question answering system. Type in a question and the system will return the answer and the source document."
|
87 |
+
)
|
88 |
+
question = gr.Textbox(
|
89 |
+
lines=1,
|
90 |
+
label="Question: press enter to submit",
|
91 |
+
value="Where is Babcock's head office?",
|
92 |
+
)
|
93 |
+
answer = gr.Markdown(label="Answer")
|
94 |
+
references = gr.Markdown(label="References")
|
95 |
+
question.submit(docquery, question, outputs=[answer, references])
|
96 |
+
|
97 |
+
|
98 |
+
blocks.launch(share=True, server_name="0.0.0.0", server_port=8080)
|