Spaces:
Runtime error
Runtime error
Joan Giner
commited on
Commit
·
5dddb18
1
Parent(s):
cfbb0ad
open version
Browse files- app.py +8 -4
- src/extractor.py +14 -8
app.py
CHANGED
@@ -21,8 +21,8 @@ from src.extractor import Extractor
|
|
21 |
load_dotenv()
|
22 |
|
23 |
## You api key from vendors or hugginface
|
24 |
-
openai.api_key=os.getenv("OPEN_AI_API_KEY")
|
25 |
-
LLMClient = OpenAI(model_name='text-davinci-003', openai_api_key=openai.api_key,temperature=0)
|
26 |
extractor = Extractor()
|
27 |
|
28 |
# Define function to handle the Gradio interface
|
@@ -51,6 +51,10 @@ async def extraction(input_file, apikey, dimension):
|
|
51 |
return results, completeness_report
|
52 |
|
53 |
async def ui_extraction(input_file, apikey, dimension):
|
|
|
|
|
|
|
|
|
54 |
file_name = input_file.name.split("/")[-1]
|
55 |
results, completeness_report = await extractor.extraction(file_name, input_file.name, apikey, dimension)
|
56 |
# Build results in the correct format for the Gradio front-end
|
@@ -154,7 +158,7 @@ with gr.Blocks(theme=gr.themes.Soft(), css=css) as demo:
|
|
154 |
|
155 |
""")
|
156 |
with gr.Column():
|
157 |
-
apikey_elem = gr.Text(label="OpenAI API key
|
158 |
# gr.Markdown("""
|
159 |
# <h3> Improving your data and assesing your dataset documentation </h3>
|
160 |
# The generated warning also allows you quicly check the completeness of the documentation, and spotting gaps in the document
|
@@ -261,5 +265,5 @@ with gr.Blocks(theme=gr.themes.Soft(), css=css) as demo:
|
|
261 |
|
262 |
# Run the app
|
263 |
#demo.queue(concurrency_count=5,max_size=20).launch()
|
264 |
-
demo.launch(share=False,show_api=False
|
265 |
|
|
|
21 |
load_dotenv()
|
22 |
|
23 |
## You api key from vendors or hugginface
|
24 |
+
#openai.api_key=os.getenv("OPEN_AI_API_KEY")
|
25 |
+
#LLMClient = OpenAI(model_name='text-davinci-003', openai_api_key=openai.api_key,temperature=0)
|
26 |
extractor = Extractor()
|
27 |
|
28 |
# Define function to handle the Gradio interface
|
|
|
51 |
return results, completeness_report
|
52 |
|
53 |
async def ui_extraction(input_file, apikey, dimension):
|
54 |
+
if (input_file == None):
|
55 |
+
raise gr.Error("Please upload a your data paper")
|
56 |
+
if (input_file.name.split(".")[-1] != "pdf"):
|
57 |
+
raise gr.Error("This is not a data paper, please uploead it in .pdf format")
|
58 |
file_name = input_file.name.split("/")[-1]
|
59 |
results, completeness_report = await extractor.extraction(file_name, input_file.name, apikey, dimension)
|
60 |
# Build results in the correct format for the Gradio front-end
|
|
|
158 |
|
159 |
""")
|
160 |
with gr.Column():
|
161 |
+
apikey_elem = gr.Text(label="OpenAI API key")
|
162 |
# gr.Markdown("""
|
163 |
# <h3> Improving your data and assesing your dataset documentation </h3>
|
164 |
# The generated warning also allows you quicly check the completeness of the documentation, and spotting gaps in the document
|
|
|
265 |
|
266 |
# Run the app
|
267 |
#demo.queue(concurrency_count=5,max_size=20).launch()
|
268 |
+
demo.launch(share=False,show_api=False)
|
269 |
|
src/extractor.py
CHANGED
@@ -65,8 +65,11 @@ class Extractor:
|
|
65 |
|
66 |
# Extract text from PDF file using SCIPDF and Gorbid service (you need gorbid to use it)
|
67 |
def extract_text_from_pdf(self, file_path):
|
68 |
-
|
69 |
-
|
|
|
|
|
|
|
70 |
finaltext = article_dict['title'] + " \n\n " + article_dict['authors'] + " \n\n Abstract: " + article_dict['abstract'] + " \n\n "
|
71 |
for section in article_dict['sections']:
|
72 |
sec = section['heading'] + ": "
|
@@ -109,9 +112,9 @@ class Extractor:
|
|
109 |
# Process text and get the embeddings
|
110 |
vectorspath = "./vectors/"+file_name
|
111 |
if not apikey:
|
112 |
-
apikey = openai.api_key
|
113 |
-
gr.Error("Please set your api key")
|
114 |
-
embeddings = OpenAIEmbeddings(openai_api_key=
|
115 |
if os.path.isfile(vectorspath+"/index.faiss"):
|
116 |
|
117 |
# file exists
|
@@ -147,9 +150,12 @@ class Extractor:
|
|
147 |
|
148 |
def build_chains(self, apikey):
|
149 |
if not apikey:
|
150 |
-
apikey = openai.api_key
|
151 |
-
gr.Error("Please set your
|
152 |
-
|
|
|
|
|
|
|
153 |
## In-context prompt
|
154 |
prompt_template = """Use the following pieces of context to answer the question at the end. If you don't know the answer, just say that you don't know, don't try to make up an answer.
|
155 |
Question: {question}
|
|
|
65 |
|
66 |
# Extract text from PDF file using SCIPDF and Gorbid service (you need gorbid to use it)
|
67 |
def extract_text_from_pdf(self, file_path):
|
68 |
+
try:
|
69 |
+
article_dict = scipdf.parse_pdf_to_dict(file_path, soup=True,return_coordinates=False, grobid_url="https://kermitt2-grobid.hf.space") # return dictionary
|
70 |
+
print("PDF parsed")
|
71 |
+
except:
|
72 |
+
raise gr.Error("Error parsing PDF, please update your data paper in the correct format")
|
73 |
finaltext = article_dict['title'] + " \n\n " + article_dict['authors'] + " \n\n Abstract: " + article_dict['abstract'] + " \n\n "
|
74 |
for section in article_dict['sections']:
|
75 |
sec = section['heading'] + ": "
|
|
|
112 |
# Process text and get the embeddings
|
113 |
vectorspath = "./vectors/"+file_name
|
114 |
if not apikey:
|
115 |
+
#apikey = openai.api_key
|
116 |
+
raise gr.Error("Please set your api key")
|
117 |
+
embeddings = OpenAIEmbeddings(openai_api_key=apikey)
|
118 |
if os.path.isfile(vectorspath+"/index.faiss"):
|
119 |
|
120 |
# file exists
|
|
|
150 |
|
151 |
def build_chains(self, apikey):
|
152 |
if not apikey:
|
153 |
+
#apikey = openai.api_key
|
154 |
+
raise gr.Error("Please set your Api key")
|
155 |
+
try:
|
156 |
+
LLMClient = OpenAI(model_name='text-davinci-003',openai_api_key=apikey,temperature=0)
|
157 |
+
except:
|
158 |
+
raise gr.Error("Your Api key is not valid")
|
159 |
## In-context prompt
|
160 |
prompt_template = """Use the following pieces of context to answer the question at the end. If you don't know the answer, just say that you don't know, don't try to make up an answer.
|
161 |
Question: {question}
|