scottsyms commited on
Commit
7886e70
·
1 Parent(s): e2c1c94

Upload folder using huggingface_hub

Browse files
Files changed (4) hide show
  1. Dockerfile +24 -0
  2. README.md +3 -9
  3. __pycache__/app.cpython-311.pyc +0 -0
  4. app.py +98 -0
Dockerfile ADDED
@@ -0,0 +1,24 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ FROM python:3.10
2
+
3
+ WORKDIR /usr/src/app
4
+ # RUN mkdir /usr/src/app/greenprocurementdb2
5
+ # RUN mkdir /usr/src/app/pickle
6
+ # RUN mkdir /usr/src/app/data
7
+
8
+
9
+
10
+ COPY requirements.txt ./
11
+ COPY ./greenprocurementdb2 ./greenprocurementdb2
12
+ COPY ./pickle ./pickle
13
+ COPY ./data ./data
14
+ COPY ./model ./model
15
+ COPY ./summaries ./summaries
16
+
17
+ RUN pip install --upgrade pip
18
+ RUN pip install --no-cache-dir -r requirements.txt
19
+
20
+ COPY ./app.py ./
21
+
22
+ EXPOSE 7860
23
+
24
+ CMD [ "python", "./app.py" ]
README.md CHANGED
@@ -1,12 +1,6 @@
1
  ---
2
- title: Green Procurement
3
- emoji: 🦀
4
- colorFrom: yellow
5
- colorTo: purple
6
- sdk: gradio
7
- sdk_version: 3.44.4
8
  app_file: app.py
9
- pinned: false
 
10
  ---
11
-
12
- Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
 
1
  ---
2
+ title: Green_Procurement
 
 
 
 
 
3
  app_file: app.py
4
+ sdk: gradio
5
+ sdk_version: 3.44.3
6
  ---
 
 
__pycache__/app.cpython-311.pyc ADDED
Binary file (4.34 kB). View file
 
app.py ADDED
@@ -0,0 +1,98 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+
3
+ import gradio as gr
4
+ import openai
5
+ from pathlib import Path
6
+
7
+ from langchain import PromptTemplate, LLMChain
8
+ import qdrant_client
9
+ from dotenv import load_dotenv
10
+ # from langchain.chains.qa_with_sources import load_qa_with_sources_chain
11
+ from langchain.chains import RetrievalQA
12
+ from langchain.chat_models import ChatOpenAI
13
+ from langchain.chains.question_answering import load_qa_chain
14
+ from langchain.embeddings.openai import OpenAIEmbeddings
15
+ from langchain.llms import AzureOpenAI, OpenAI
16
+ from langchain.text_splitter import CharacterTextSplitter
17
+ from langchain.vectorstores import Qdrant
18
+ from qdrant_client import QdrantClient
19
+
20
+ # Constants
21
+ collection_name="10ks"
22
+ # collection_name="collectiveagreements"
23
+
24
+
25
+ # Load the environment variables with the Azure OpenAI API key
26
+ load_dotenv()
27
+
28
+ # Initialize Azure OpenAI
29
+ # openai.api_type = os.getenv("OPENAI_API_TYPE")
30
+ # openai.api_base = os.getenv("OPENAI_API_BASE")
31
+ openai.api_key = os.getenv("OPENAI_API_KEY")
32
+ # openai.api_version = os.getenv("OPENAI_API_VERSION")
33
+
34
+ # The data was vectorized with ADA, so we'll use that to convert our
35
+ # query into a vector
36
+ embeddings = OpenAIEmbeddings(model="text-embedding-ada-002") #, chunk_size=1)
37
+
38
+ dbclient = QdrantClient("localhost", port=6333, grpc_port=6334, prefer_grpc=True)
39
+
40
+ index=Qdrant(client=dbclient, collection_name=collection_name, embeddings=embeddings, vector_name="fragmentvector")
41
+
42
+ # # Load the FAISS index
43
+ # index = dbclient.retrieve. .load_local(
44
+ # "collectiveagreements.db",
45
+ # OpenAIEmbeddings(chunk_size=1, model="text-embedding-ada-002"),
46
+ # )
47
+
48
+ # Open a connection to render the search results into test- this uses davinci-002.
49
+ llm = OpenAI(deployment_name="davinci", temperature=0)
50
+
51
+ # Open op a connection to do the querying
52
+ # Chain type can be stuff, map_reduce or refine
53
+ # chain = load_qa_with_sources_chain(llm, chain_type="map_reduce")
54
+
55
+ def docquery(question):
56
+ docs = index.similarity_search(question)
57
+ print("Length of answer: ", len(docs))
58
+ # Process the query and return the results
59
+ llm = ChatOpenAI(model_name="gpt-3.5-turbo", temperature=0)
60
+ qa_chain = RetrievalQA.from_chain_type(llm,retriever=index.as_retriever(), return_source_documents=True)
61
+ output=qa_chain({"query": question})
62
+ # output = chain.run(input_documents=docs, question=question)
63
+ print("Is the error here?", output )
64
+ # myanswer = "##" + output.split("SOURCES")[0]
65
+ references = ""
66
+ # print("Docs:", docs)
67
+ for i in docs:
68
+ print("item: ", i.page_content)
69
+ references = (
70
+ references
71
+ + "**"
72
+ + "** \n"
73
+ + i.page_content.replace("\n", "")
74
+ + "\n\n"
75
+ )
76
+ return output['result'], references
77
+
78
+ with gr.Blocks(title="Collective Agreement Search") as blocks:
79
+ appname = gr.Markdown(value="# 10K filings search")
80
+ appdesc = gr.Markdown(
81
+ value="## The tabs below demonstration different ways to query the data."
82
+ )
83
+
84
+ with gr.Tab("Ask a question"):
85
+ appdesc = gr.Markdown(
86
+ value="### This is a demo of an OpenAI-based question answering system. Type in a question and the system will return the answer and the source document."
87
+ )
88
+ question = gr.Textbox(
89
+ lines=1,
90
+ label="Question: press enter to submit",
91
+ value="Where is Babcock's head office?",
92
+ )
93
+ answer = gr.Markdown(label="Answer")
94
+ references = gr.Markdown(label="References")
95
+ question.submit(docquery, question, outputs=[answer, references])
96
+
97
+
98
+ blocks.launch(share=True, server_name="0.0.0.0", server_port=8080)