Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -110,15 +110,37 @@ def split_text(text, max_tokens=500):
|
|
110 |
|
111 |
return chunks
|
112 |
|
|
|
|
|
|
|
|
|
113 |
def create_bibtex_entry(data):
|
114 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
115 |
for key, value in data.items():
|
116 |
-
if
|
117 |
bibtex += f" {key.lower()} = {{{value.strip()}}},\n"
|
118 |
bibtex = bibtex.rstrip(',\n') + "\n}"
|
119 |
return bibtex
|
120 |
|
121 |
-
|
122 |
def transform_chunks(marianne_segmentation):
|
123 |
marianne_segmentation = pd.DataFrame(marianne_segmentation)
|
124 |
marianne_segmentation = marianne_segmentation[marianne_segmentation['entity_group'] != 'separator']
|
@@ -153,8 +175,6 @@ def transform_chunks(marianne_segmentation):
|
|
153 |
|
154 |
final_html = '\n'.join(html_output)
|
155 |
return final_html, bibtex_entry
|
156 |
-
|
157 |
-
|
158 |
|
159 |
# Class to encapsulate the Falcon chatbot
|
160 |
class MistralChatBot:
|
|
|
110 |
|
111 |
return chunks
|
112 |
|
113 |
+
def extract_year(text):
|
114 |
+
year_match = re.search(r'\b(\d{4})\b', text)
|
115 |
+
return year_match.group(1) if year_match else None
|
116 |
+
|
117 |
def create_bibtex_entry(data):
|
118 |
+
# Determine the entry type
|
119 |
+
if 'Journal' in data:
|
120 |
+
entry_type = 'article'
|
121 |
+
elif 'Booktitle' in data:
|
122 |
+
entry_type = 'incollection'
|
123 |
+
else:
|
124 |
+
entry_type = 'book'
|
125 |
+
|
126 |
+
# Extract year from 'None' if it exists
|
127 |
+
none_content = data.pop('None', '')
|
128 |
+
year = extract_year(none_content)
|
129 |
+
if year and 'Year' not in data:
|
130 |
+
data['Year'] = year
|
131 |
+
|
132 |
+
# Create BibTeX ID
|
133 |
+
author_words = data.get('Author', '').split()
|
134 |
+
first_author = author_words[0] if author_words else 'Unknown'
|
135 |
+
bibtex_id = f"{first_author}{year}" if year else first_author
|
136 |
+
|
137 |
+
bibtex = f"@{entry_type}{{{bibtex_id},\n"
|
138 |
for key, value in data.items():
|
139 |
+
if value.strip():
|
140 |
bibtex += f" {key.lower()} = {{{value.strip()}}},\n"
|
141 |
bibtex = bibtex.rstrip(',\n') + "\n}"
|
142 |
return bibtex
|
143 |
|
|
|
144 |
def transform_chunks(marianne_segmentation):
|
145 |
marianne_segmentation = pd.DataFrame(marianne_segmentation)
|
146 |
marianne_segmentation = marianne_segmentation[marianne_segmentation['entity_group'] != 'separator']
|
|
|
175 |
|
176 |
final_html = '\n'.join(html_output)
|
177 |
return final_html, bibtex_entry
|
|
|
|
|
178 |
|
179 |
# Class to encapsulate the Falcon chatbot
|
180 |
class MistralChatBot:
|