Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
|
@@ -110,6 +110,9 @@ def split_text(text, max_tokens=500):
|
|
| 110 |
|
| 111 |
return chunks
|
| 112 |
|
|
|
|
|
|
|
|
|
|
| 113 |
def extract_year(text):
|
| 114 |
year_match = re.search(r'\b(\d{4})\b', text)
|
| 115 |
return year_match.group(1) if year_match else None
|
|
@@ -133,10 +136,13 @@ def create_bibtex_entry(data):
|
|
| 133 |
author_words = data.get('author', '').split()
|
| 134 |
first_author = author_words[0] if author_words else 'Unknown'
|
| 135 |
bibtex_id = f"{first_author}{year}" if year else first_author
|
|
|
|
| 136 |
|
| 137 |
bibtex = f"@{entry_type}{{{bibtex_id},\n"
|
| 138 |
for key, value in data.items():
|
| 139 |
if value.strip():
|
|
|
|
|
|
|
| 140 |
bibtex += f" {key.lower()} = {{{value.strip()}}},\n"
|
| 141 |
bibtex = bibtex.rstrip(',\n') + "\n}"
|
| 142 |
return bibtex
|
|
@@ -210,8 +216,7 @@ class MistralChatBot:
|
|
| 210 |
print("BibTeX Entry:")
|
| 211 |
print(bibtex_entry)
|
| 212 |
|
| 213 |
-
|
| 214 |
-
return generated_text, bibtex_entry
|
| 215 |
|
| 216 |
# Create the Falcon chatbot instance
|
| 217 |
mistral_bot = MistralChatBot()
|
|
@@ -232,9 +237,8 @@ with gr.Blocks(theme='JohnSmith9982/small_and_pretty') as demo:
|
|
| 232 |
gr.HTML("""<h1 style="text-align:center">Reversed Zotero</h1>""")
|
| 233 |
text_input = gr.Textbox(label="Your text", type="text", lines=5)
|
| 234 |
text_button = gr.Button("Extract a structured bibtex")
|
| 235 |
-
text_output = gr.HTML(label="Metadata")
|
| 236 |
bibtex_output = gr.Textbox(label="BibTeX Entry", lines=10)
|
| 237 |
-
text_button.click(mistral_bot.predict, inputs=text_input, outputs=[
|
| 238 |
|
| 239 |
if __name__ == "__main__":
|
| 240 |
demo.queue().launch()
|
|
|
|
| 110 |
|
| 111 |
return chunks
|
| 112 |
|
| 113 |
+
def remove_punctuation(text):
|
| 114 |
+
return re.sub(r'[^\w\s]', '', text)
|
| 115 |
+
|
| 116 |
def extract_year(text):
|
| 117 |
year_match = re.search(r'\b(\d{4})\b', text)
|
| 118 |
return year_match.group(1) if year_match else None
|
|
|
|
| 136 |
author_words = data.get('author', '').split()
|
| 137 |
first_author = author_words[0] if author_words else 'Unknown'
|
| 138 |
bibtex_id = f"{first_author}{year}" if year else first_author
|
| 139 |
+
bibtex_id = remove_punctuation(bibtex_id.lower())
|
| 140 |
|
| 141 |
bibtex = f"@{entry_type}{{{bibtex_id},\n"
|
| 142 |
for key, value in data.items():
|
| 143 |
if value.strip():
|
| 144 |
+
if key in ['volume', 'pages', 'year']:
|
| 145 |
+
value = remove_punctuation(value)
|
| 146 |
bibtex += f" {key.lower()} = {{{value.strip()}}},\n"
|
| 147 |
bibtex = bibtex.rstrip(',\n') + "\n}"
|
| 148 |
return bibtex
|
|
|
|
| 216 |
print("BibTeX Entry:")
|
| 217 |
print(bibtex_entry)
|
| 218 |
|
| 219 |
+
return bibtex_entry
|
|
|
|
| 220 |
|
| 221 |
# Create the Falcon chatbot instance
|
| 222 |
mistral_bot = MistralChatBot()
|
|
|
|
| 237 |
gr.HTML("""<h1 style="text-align:center">Reversed Zotero</h1>""")
|
| 238 |
text_input = gr.Textbox(label="Your text", type="text", lines=5)
|
| 239 |
text_button = gr.Button("Extract a structured bibtex")
|
|
|
|
| 240 |
bibtex_output = gr.Textbox(label="BibTeX Entry", lines=10)
|
| 241 |
+
text_button.click(mistral_bot.predict, inputs=text_input, outputs=[bibtex_output])
|
| 242 |
|
| 243 |
if __name__ == "__main__":
|
| 244 |
demo.queue().launch()
|