Pclanglais commited on
Commit
589a379
·
verified ·
1 Parent(s): da15460

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +20 -34
app.py CHANGED
@@ -111,27 +111,11 @@ def split_text(text, max_tokens=500):
111
  return chunks
112
 
113
  def create_bibtex_entry(data):
114
- author = data.get('Author', '').strip()
115
- title = data.get('Title', '').strip()
116
- journal = data.get('Journal', '').strip()
117
- year = data.get('Year', '').strip()
118
- volume = data.get('Volume', '').strip()
119
- pages = data.get('Pages', '').strip()
120
- doi = data.get('Doi', '').strip()
121
-
122
- # Remove "doi: " prefix if present
123
- doi = doi.replace('doi: ', '')
124
-
125
  bibtex = "@article{idnothing,\n"
126
- if author: bibtex += f" author = {{{author}}},\n"
127
- if title: bibtex += f" title = {{{title}}},\n"
128
- if journal: bibtex += f" journal = {{{journal}}},\n"
129
- if year: bibtex += f" year = {{{year}}},\n"
130
- if volume: bibtex += f" volume = {{{volume}}},\n"
131
- if pages: bibtex += f" pages = {{{pages}}},\n"
132
- if doi: bibtex += f" doi = {{{doi}}},\n"
133
- bibtex += "}"
134
-
135
  return bibtex
136
 
137
 
@@ -151,13 +135,13 @@ def transform_chunks(marianne_segmentation):
151
  result_entity = "[" + entity_group.capitalize() + "]"
152
  word = row['word']
153
 
154
- if entity_group in ['Author', 'Title', 'Journal', 'Pages', 'Doi']:
155
  if entity_group in bibtex_data:
156
  bibtex_data[entity_group] += ' ' + word
157
  else:
158
  bibtex_data[entity_group] = word
159
  current_entity = entity_group
160
- elif entity_group == 'None':
161
  if current_entity:
162
  bibtex_data[current_entity] += ' ' + word
163
  else:
@@ -165,21 +149,11 @@ def transform_chunks(marianne_segmentation):
165
 
166
  html_output.append(f'<div class="manuscript"><div class="annotation">{result_entity}</div><div class="content">{word}</div></div>')
167
 
168
- # Extract year from the 'None' field if present
169
- none_content = bibtex_data.get('None', '')
170
- year_match = re.search(r'\((\d{4})\)', none_content)
171
- if year_match:
172
- bibtex_data['Year'] = year_match.group(1)
173
-
174
- # Extract volume from the 'None' field if present
175
- volume_match = re.search(r',\s*(\d+),', none_content)
176
- if volume_match:
177
- bibtex_data['Volume'] = volume_match.group(1)
178
-
179
  bibtex_entry = create_bibtex_entry(bibtex_data)
180
 
181
  final_html = '\n'.join(html_output)
182
  return final_html, bibtex_entry
 
183
 
184
 
185
  # Class to encapsulate the Falcon chatbot
@@ -203,7 +177,19 @@ class MistralChatBot:
203
  classified_list.append(df)
204
 
205
  classified_list = pd.concat(classified_list)
 
 
 
 
 
206
  html_output, bibtex_entry = transform_chunks(classified_list)
 
 
 
 
 
 
 
207
  generated_text = f'{css}<h2 style="text-align:center">Edited text</h2>\n<div class="generation">{html_output}</div>'
208
  return generated_text, bibtex_entry
209
 
@@ -224,7 +210,7 @@ demo = gr.Blocks()
224
 
225
  with gr.Blocks(theme='JohnSmith9982/small_and_pretty') as demo:
226
  gr.HTML("""<h1 style="text-align:center">Reversed Zotero</h1>""")
227
- text_input = gr.Textbox(label="Your text", type="text", lines=1)
228
  text_button = gr.Button("Extract a structured bibtex")
229
  text_output = gr.HTML(label="Metadata")
230
  bibtex_output = gr.Textbox(label="BibTeX Entry", lines=10)
 
111
  return chunks
112
 
113
  def create_bibtex_entry(data):
 
 
 
 
 
 
 
 
 
 
 
114
  bibtex = "@article{idnothing,\n"
115
+ for key, value in data.items():
116
+ if key != 'None' and value.strip():
117
+ bibtex += f" {key.lower()} = {{{value.strip()}}},\n"
118
+ bibtex = bibtex.rstrip(',\n') + "\n}"
 
 
 
 
 
119
  return bibtex
120
 
121
 
 
135
  result_entity = "[" + entity_group.capitalize() + "]"
136
  word = row['word']
137
 
138
+ if entity_group != 'None':
139
  if entity_group in bibtex_data:
140
  bibtex_data[entity_group] += ' ' + word
141
  else:
142
  bibtex_data[entity_group] = word
143
  current_entity = entity_group
144
+ else:
145
  if current_entity:
146
  bibtex_data[current_entity] += ' ' + word
147
  else:
 
149
 
150
  html_output.append(f'<div class="manuscript"><div class="annotation">{result_entity}</div><div class="content">{word}</div></div>')
151
 
 
 
 
 
 
 
 
 
 
 
 
152
  bibtex_entry = create_bibtex_entry(bibtex_data)
153
 
154
  final_html = '\n'.join(html_output)
155
  return final_html, bibtex_entry
156
+
157
 
158
 
159
  # Class to encapsulate the Falcon chatbot
 
177
  classified_list.append(df)
178
 
179
  classified_list = pd.concat(classified_list)
180
+
181
+ # Debugging: Print the classified list
182
+ print("Classified List:")
183
+ print(classified_list)
184
+
185
  html_output, bibtex_entry = transform_chunks(classified_list)
186
+
187
+ # Debugging: Print the outputs
188
+ print("HTML Output:")
189
+ print(html_output)
190
+ print("BibTeX Entry:")
191
+ print(bibtex_entry)
192
+
193
  generated_text = f'{css}<h2 style="text-align:center">Edited text</h2>\n<div class="generation">{html_output}</div>'
194
  return generated_text, bibtex_entry
195
 
 
210
 
211
  with gr.Blocks(theme='JohnSmith9982/small_and_pretty') as demo:
212
  gr.HTML("""<h1 style="text-align:center">Reversed Zotero</h1>""")
213
+ text_input = gr.Textbox(label="Your text", type="text", lines=5)
214
  text_button = gr.Button("Extract a structured bibtex")
215
  text_output = gr.HTML(label="Metadata")
216
  bibtex_output = gr.Textbox(label="BibTeX Entry", lines=10)