Something

Running

App Files Files Community

Pclanglais commited on Jul 6, 2024

Commit

7a72935

verified ·

1 Parent(s): e03a3c9

Update app.py

Browse files

Files changed (1) hide show

app.py +43 -5

app.py CHANGED Viewed

@@ -110,6 +110,29 @@ def split_text(text, max_tokens=500):
     return chunks
 def transform_chunks(marianne_segmentation):
     marianne_segmentation = pd.DataFrame(marianne_segmentation)
     marianne_segmentation = marianne_segmentation[marianne_segmentation['entity_group'] != 'separator']
@@ -118,11 +141,23 @@ def transform_chunks(marianne_segmentation):
     marianne_segmentation = marianne_segmentation[marianne_segmentation['word'].notna() & (marianne_segmentation['word'] != '') & (marianne_segmentation['word'] != ' ')]
     html_output = []
     for _, row in marianne_segmentation.iterrows():
         entity_group = row['entity_group']
         result_entity = "[" + entity_group.capitalize() + "]"
         word = row['word']
         if entity_group == 'title':
             html_output.append(f'<div class="manuscript"><div class="annotation">{result_entity}</div><div class="content title-content"><h2>{word}</h2></div></div>')
         elif entity_group == 'bibliography':
@@ -132,8 +167,10 @@ def transform_chunks(marianne_segmentation):
         else:
             html_output.append(f'<div class="manuscript"><div class="annotation">{result_entity}</div><div class="content">{word}</div></div>')
     final_html = '\n'.join(html_output)
-    return final_html
 # Class to encapsulate the Falcon chatbot
@@ -157,9 +194,9 @@ class MistralChatBot:
             classified_list.append(df)
         classified_list = pd.concat(classified_list)
-        out = transform_chunks(classified_list)
-        generated_text = f'{css}<h2 style="text-align:center">Edited text</h2>\n<div class="generation">{out}</div>'
-        return generated_text
 # Create the Falcon chatbot instance
 mistral_bot = MistralChatBot()
@@ -181,7 +218,8 @@ with gr.Blocks(theme='JohnSmith9982/small_and_pretty') as demo:
     text_input = gr.Textbox(label="Your text", type="text", lines=1)
     text_button = gr.Button("Extract a structured bibtex")
     text_output = gr.HTML(label="Metadata")
-    text_button.click(mistral_bot.predict, inputs=text_input, outputs=[text_output])
 if __name__ == "__main__":
     demo.queue().launch()

     return chunks
+def create_bibtex_entry(data):
+    author = data.get('Author', '')
+    title = data.get('Title', '')
+    journal = data.get('Journal', '')
+    year = data.get('Year', '')
+    volume = data.get('Volume', '')
+    pages = data.get('Pages', '')
+    doi = data.get('Doi', '')
+    # Remove "doi: " prefix if present
+    doi = doi.replace('doi: ', '')
+    bibtex = f"""@article{{idnothing,
+  author = {{{author}}},
+  title = {{{title}}},
+  journal = {{{journal}}},
+  year = {{{year}}},
+  volume = {{{volume}}},
+  pages = {{{pages}}},
+  doi = {{{doi}}}
+}}"""
+    return bibtex
 def transform_chunks(marianne_segmentation):
     marianne_segmentation = pd.DataFrame(marianne_segmentation)
     marianne_segmentation = marianne_segmentation[marianne_segmentation['entity_group'] != 'separator']
     marianne_segmentation = marianne_segmentation[marianne_segmentation['word'].notna() & (marianne_segmentation['word'] != '') & (marianne_segmentation['word'] != ' ')]
     html_output = []
+    bibtex_data = {}
+    current_entity = None
     for _, row in marianne_segmentation.iterrows():
         entity_group = row['entity_group']
         result_entity = "[" + entity_group.capitalize() + "]"
         word = row['word']
+        if entity_group in ['Author', 'Title', 'Journal', 'Pages', 'Doi']:
+            if entity_group in bibtex_data:
+                bibtex_data[entity_group] += ' ' + word
+            else:
+                bibtex_data[entity_group] = word
+            current_entity = entity_group
+        elif entity_group == 'None' and current_entity:
+            bibtex_data[current_entity] += ' ' + word
         if entity_group == 'title':
             html_output.append(f'<div class="manuscript"><div class="annotation">{result_entity}</div><div class="content title-content"><h2>{word}</h2></div></div>')
         elif entity_group == 'bibliography':
         else:
             html_output.append(f'<div class="manuscript"><div class="annotation">{result_entity}</div><div class="content">{word}</div></div>')
+    bibtex_entry = create_bibtex_entry(bibtex_data)
     final_html = '\n'.join(html_output)
+    return final_html, bibtex_entry
 # Class to encapsulate the Falcon chatbot
             classified_list.append(df)
         classified_list = pd.concat(classified_list)
+        html_output, bibtex_entry = transform_chunks(classified_list)
+        generated_text = f'{css}<h2 style="text-align:center">Edited text</h2>\n<div class="generation">{html_output}</div>'
+        return generated_text, bibtex_entry
 # Create the Falcon chatbot instance
 mistral_bot = MistralChatBot()
     text_input = gr.Textbox(label="Your text", type="text", lines=1)
     text_button = gr.Button("Extract a structured bibtex")
     text_output = gr.HTML(label="Metadata")
+    bibtex_output = gr.Textbox(label="BibTeX Entry", lines=10)
+    text_button.click(mistral_bot.predict, inputs=text_input, outputs=[text_output, bibtex_output])
 if __name__ == "__main__":
     demo.queue().launch()