Pclanglais commited on
Commit
033a22d
·
verified ·
1 Parent(s): 589a379

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +25 -5
app.py CHANGED
@@ -110,15 +110,37 @@ def split_text(text, max_tokens=500):
110
 
111
  return chunks
112
 
 
 
 
 
113
  def create_bibtex_entry(data):
114
- bibtex = "@article{idnothing,\n"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
115
  for key, value in data.items():
116
- if key != 'None' and value.strip():
117
  bibtex += f" {key.lower()} = {{{value.strip()}}},\n"
118
  bibtex = bibtex.rstrip(',\n') + "\n}"
119
  return bibtex
120
 
121
-
122
  def transform_chunks(marianne_segmentation):
123
  marianne_segmentation = pd.DataFrame(marianne_segmentation)
124
  marianne_segmentation = marianne_segmentation[marianne_segmentation['entity_group'] != 'separator']
@@ -153,8 +175,6 @@ def transform_chunks(marianne_segmentation):
153
 
154
  final_html = '\n'.join(html_output)
155
  return final_html, bibtex_entry
156
-
157
-
158
 
159
  # Class to encapsulate the Falcon chatbot
160
  class MistralChatBot:
 
110
 
111
  return chunks
112
 
113
+ def extract_year(text):
114
+ year_match = re.search(r'\b(\d{4})\b', text)
115
+ return year_match.group(1) if year_match else None
116
+
117
  def create_bibtex_entry(data):
118
+ # Determine the entry type
119
+ if 'Journal' in data:
120
+ entry_type = 'article'
121
+ elif 'Booktitle' in data:
122
+ entry_type = 'incollection'
123
+ else:
124
+ entry_type = 'book'
125
+
126
+ # Extract year from 'None' if it exists
127
+ none_content = data.pop('None', '')
128
+ year = extract_year(none_content)
129
+ if year and 'Year' not in data:
130
+ data['Year'] = year
131
+
132
+ # Create BibTeX ID
133
+ author_words = data.get('Author', '').split()
134
+ first_author = author_words[0] if author_words else 'Unknown'
135
+ bibtex_id = f"{first_author}{year}" if year else first_author
136
+
137
+ bibtex = f"@{entry_type}{{{bibtex_id},\n"
138
  for key, value in data.items():
139
+ if value.strip():
140
  bibtex += f" {key.lower()} = {{{value.strip()}}},\n"
141
  bibtex = bibtex.rstrip(',\n') + "\n}"
142
  return bibtex
143
 
 
144
  def transform_chunks(marianne_segmentation):
145
  marianne_segmentation = pd.DataFrame(marianne_segmentation)
146
  marianne_segmentation = marianne_segmentation[marianne_segmentation['entity_group'] != 'separator']
 
175
 
176
  final_html = '\n'.join(html_output)
177
  return final_html, bibtex_entry
 
 
178
 
179
  # Class to encapsulate the Falcon chatbot
180
  class MistralChatBot: