Pclanglais commited on
Commit
7a72935
·
verified ·
1 Parent(s): e03a3c9

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +43 -5
app.py CHANGED
@@ -110,6 +110,29 @@ def split_text(text, max_tokens=500):
110
 
111
  return chunks
112
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
113
  def transform_chunks(marianne_segmentation):
114
  marianne_segmentation = pd.DataFrame(marianne_segmentation)
115
  marianne_segmentation = marianne_segmentation[marianne_segmentation['entity_group'] != 'separator']
@@ -118,11 +141,23 @@ def transform_chunks(marianne_segmentation):
118
  marianne_segmentation = marianne_segmentation[marianne_segmentation['word'].notna() & (marianne_segmentation['word'] != '') & (marianne_segmentation['word'] != ' ')]
119
 
120
  html_output = []
 
 
 
121
  for _, row in marianne_segmentation.iterrows():
122
  entity_group = row['entity_group']
123
  result_entity = "[" + entity_group.capitalize() + "]"
124
  word = row['word']
125
 
 
 
 
 
 
 
 
 
 
126
  if entity_group == 'title':
127
  html_output.append(f'<div class="manuscript"><div class="annotation">{result_entity}</div><div class="content title-content"><h2>{word}</h2></div></div>')
128
  elif entity_group == 'bibliography':
@@ -132,8 +167,10 @@ def transform_chunks(marianne_segmentation):
132
  else:
133
  html_output.append(f'<div class="manuscript"><div class="annotation">{result_entity}</div><div class="content">{word}</div></div>')
134
 
 
 
135
  final_html = '\n'.join(html_output)
136
- return final_html
137
 
138
 
139
  # Class to encapsulate the Falcon chatbot
@@ -157,9 +194,9 @@ class MistralChatBot:
157
  classified_list.append(df)
158
 
159
  classified_list = pd.concat(classified_list)
160
- out = transform_chunks(classified_list)
161
- generated_text = f'{css}<h2 style="text-align:center">Edited text</h2>\n<div class="generation">{out}</div>'
162
- return generated_text
163
 
164
  # Create the Falcon chatbot instance
165
  mistral_bot = MistralChatBot()
@@ -181,7 +218,8 @@ with gr.Blocks(theme='JohnSmith9982/small_and_pretty') as demo:
181
  text_input = gr.Textbox(label="Your text", type="text", lines=1)
182
  text_button = gr.Button("Extract a structured bibtex")
183
  text_output = gr.HTML(label="Metadata")
184
- text_button.click(mistral_bot.predict, inputs=text_input, outputs=[text_output])
 
185
 
186
  if __name__ == "__main__":
187
  demo.queue().launch()
 
110
 
111
  return chunks
112
 
113
+ def create_bibtex_entry(data):
114
+ author = data.get('Author', '')
115
+ title = data.get('Title', '')
116
+ journal = data.get('Journal', '')
117
+ year = data.get('Year', '')
118
+ volume = data.get('Volume', '')
119
+ pages = data.get('Pages', '')
120
+ doi = data.get('Doi', '')
121
+
122
+ # Remove "doi: " prefix if present
123
+ doi = doi.replace('doi: ', '')
124
+
125
+ bibtex = f"""@article{{idnothing,
126
+ author = {{{author}}},
127
+ title = {{{title}}},
128
+ journal = {{{journal}}},
129
+ year = {{{year}}},
130
+ volume = {{{volume}}},
131
+ pages = {{{pages}}},
132
+ doi = {{{doi}}}
133
+ }}"""
134
+ return bibtex
135
+
136
  def transform_chunks(marianne_segmentation):
137
  marianne_segmentation = pd.DataFrame(marianne_segmentation)
138
  marianne_segmentation = marianne_segmentation[marianne_segmentation['entity_group'] != 'separator']
 
141
  marianne_segmentation = marianne_segmentation[marianne_segmentation['word'].notna() & (marianne_segmentation['word'] != '') & (marianne_segmentation['word'] != ' ')]
142
 
143
  html_output = []
144
+ bibtex_data = {}
145
+ current_entity = None
146
+
147
  for _, row in marianne_segmentation.iterrows():
148
  entity_group = row['entity_group']
149
  result_entity = "[" + entity_group.capitalize() + "]"
150
  word = row['word']
151
 
152
+ if entity_group in ['Author', 'Title', 'Journal', 'Pages', 'Doi']:
153
+ if entity_group in bibtex_data:
154
+ bibtex_data[entity_group] += ' ' + word
155
+ else:
156
+ bibtex_data[entity_group] = word
157
+ current_entity = entity_group
158
+ elif entity_group == 'None' and current_entity:
159
+ bibtex_data[current_entity] += ' ' + word
160
+
161
  if entity_group == 'title':
162
  html_output.append(f'<div class="manuscript"><div class="annotation">{result_entity}</div><div class="content title-content"><h2>{word}</h2></div></div>')
163
  elif entity_group == 'bibliography':
 
167
  else:
168
  html_output.append(f'<div class="manuscript"><div class="annotation">{result_entity}</div><div class="content">{word}</div></div>')
169
 
170
+ bibtex_entry = create_bibtex_entry(bibtex_data)
171
+
172
  final_html = '\n'.join(html_output)
173
+ return final_html, bibtex_entry
174
 
175
 
176
  # Class to encapsulate the Falcon chatbot
 
194
  classified_list.append(df)
195
 
196
  classified_list = pd.concat(classified_list)
197
+ html_output, bibtex_entry = transform_chunks(classified_list)
198
+ generated_text = f'{css}<h2 style="text-align:center">Edited text</h2>\n<div class="generation">{html_output}</div>'
199
+ return generated_text, bibtex_entry
200
 
201
  # Create the Falcon chatbot instance
202
  mistral_bot = MistralChatBot()
 
218
  text_input = gr.Textbox(label="Your text", type="text", lines=1)
219
  text_button = gr.Button("Extract a structured bibtex")
220
  text_output = gr.HTML(label="Metadata")
221
+ bibtex_output = gr.Textbox(label="BibTeX Entry", lines=10)
222
+ text_button.click(mistral_bot.predict, inputs=text_input, outputs=[text_output, bibtex_output])
223
 
224
  if __name__ == "__main__":
225
  demo.queue().launch()