Spaces:

JoanGiner
/

DataDoc_Analyzer

Runtime error

App Files Files Community

Joan Giner commited on Oct 3, 2023

Commit

5dddb18

1 Parent(s): cfbb0ad

open version

Browse files

Files changed (2) hide show

app.py +8 -4
src/extractor.py +14 -8

app.py CHANGED Viewed

@@ -21,8 +21,8 @@ from src.extractor import Extractor
 load_dotenv()
 ## You api key from vendors or hugginface
-openai.api_key=os.getenv("OPEN_AI_API_KEY")
-LLMClient = OpenAI(model_name='text-davinci-003', openai_api_key=openai.api_key,temperature=0)
 extractor = Extractor()
 # Define function to handle the Gradio interface
@@ -51,6 +51,10 @@ async def extraction(input_file, apikey, dimension):
     return results, completeness_report
 async def ui_extraction(input_file, apikey, dimension):
         file_name = input_file.name.split("/")[-1]
         results, completeness_report = await extractor.extraction(file_name, input_file.name, apikey, dimension)
         # Build results in the correct format for the Gradio front-end
@@ -154,7 +158,7 @@ with gr.Blocks(theme=gr.themes.Soft(), css=css) as demo:
          """)
         with gr.Column():
-            apikey_elem = gr.Text(label="OpenAI API key (Not needed during review)")
          #   gr.Markdown("""
          #                   <h3> Improving your data and assesing your dataset documentation </h3>
          #                   The generated warning also allows you quicly check the completeness of the documentation, and spotting gaps in the document
@@ -261,5 +265,5 @@ with gr.Blocks(theme=gr.themes.Soft(), css=css) as demo:
     # Run the app
     #demo.queue(concurrency_count=5,max_size=20).launch()
-    demo.launch(share=False,show_api=False,auth=("CIKM2023", "demodemo"))

 load_dotenv()
 ## You api key from vendors or hugginface
+#openai.api_key=os.getenv("OPEN_AI_API_KEY")
+#LLMClient = OpenAI(model_name='text-davinci-003', openai_api_key=openai.api_key,temperature=0)
 extractor = Extractor()
 # Define function to handle the Gradio interface
     return results, completeness_report
 async def ui_extraction(input_file, apikey, dimension):
+        if (input_file == None):
+            raise gr.Error("Please upload a your data paper")
+        if (input_file.name.split(".")[-1] != "pdf"):
+            raise gr.Error("This is not a data paper, please uploead it in .pdf format")
         file_name = input_file.name.split("/")[-1]
         results, completeness_report = await extractor.extraction(file_name, input_file.name, apikey, dimension)
         # Build results in the correct format for the Gradio front-end
          """)
         with gr.Column():
+            apikey_elem = gr.Text(label="OpenAI API key")
          #   gr.Markdown("""
          #                   <h3> Improving your data and assesing your dataset documentation </h3>
          #                   The generated warning also allows you quicly check the completeness of the documentation, and spotting gaps in the document
     # Run the app
     #demo.queue(concurrency_count=5,max_size=20).launch()
+    demo.launch(share=False,show_api=False)

src/extractor.py CHANGED Viewed

@@ -65,8 +65,11 @@ class Extractor:
     # Extract text from PDF file using SCIPDF and Gorbid service (you need gorbid to use it)
     def extract_text_from_pdf(self, file_path):
-        article_dict = scipdf.parse_pdf_to_dict(file_path, soup=True,return_coordinates=False, grobid_url="https://kermitt2-grobid.hf.space") # return dictionary
-        print("PDF parsed")
         finaltext = article_dict['title'] + " \n\n " + article_dict['authors'] + " \n\n Abstract: " + article_dict['abstract'] + " \n\n "
         for section in article_dict['sections']:
             sec = section['heading'] + ": "
@@ -109,9 +112,9 @@ class Extractor:
         # Process text and get the embeddings
         vectorspath = "./vectors/"+file_name
         if not apikey:
-            apikey = openai.api_key
-            gr.Error("Please set your api key")
-        embeddings = OpenAIEmbeddings(openai_api_key=openai.api_key)
         if os.path.isfile(vectorspath+"/index.faiss"):
             # file exists
@@ -147,9 +150,12 @@ class Extractor:
     def build_chains(self, apikey):
         if not apikey:
-            apikey = openai.api_key
-            gr.Error("Please set your api key")
-        LLMClient = OpenAI(model_name='text-davinci-003',openai_api_key=apikey,temperature=0)
         ## In-context prompt
         prompt_template = """Use the following pieces of context to answer the question at the end. If you don't know the answer, just say that you don't know, don't try to make up an answer.
         Question: {question}

     # Extract text from PDF file using SCIPDF and Gorbid service (you need gorbid to use it)
     def extract_text_from_pdf(self, file_path):
+        try:
+            article_dict = scipdf.parse_pdf_to_dict(file_path, soup=True,return_coordinates=False, grobid_url="https://kermitt2-grobid.hf.space") # return dictionary
+            print("PDF parsed")
+        except:
+            raise gr.Error("Error parsing PDF, please update your data paper in the correct format")
         finaltext = article_dict['title'] + " \n\n " + article_dict['authors'] + " \n\n Abstract: " + article_dict['abstract'] + " \n\n "
         for section in article_dict['sections']:
             sec = section['heading'] + ": "
         # Process text and get the embeddings
         vectorspath = "./vectors/"+file_name
         if not apikey:
+            #apikey = openai.api_key
+            raise gr.Error("Please set your api key")
+        embeddings = OpenAIEmbeddings(openai_api_key=apikey)
         if os.path.isfile(vectorspath+"/index.faiss"):
             # file exists
     def build_chains(self, apikey):
         if not apikey:
+            #apikey = openai.api_key
+            raise gr.Error("Please set your Api key")
+        try:
+            LLMClient = OpenAI(model_name='text-davinci-003',openai_api_key=apikey,temperature=0)
+        except:
+            raise gr.Error("Your Api key is not valid")
         ## In-context prompt
         prompt_template = """Use the following pieces of context to answer the question at the end. If you don't know the answer, just say that you don't know, don't try to make up an answer.
         Question: {question}