Spaces:
Runtime error
Runtime error
initial commit: research summarizer with FLAN-T5
Browse files
app.py
CHANGED
@@ -0,0 +1,40 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import requests
|
2 |
+
import gradio as gr
|
3 |
+
from bs4 import BeautifulSoup
|
4 |
+
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM, pipeline
|
5 |
+
|
6 |
+
# Lightweight model for instruction-tuned summarization
|
7 |
+
tokenizer = AutoTokenizer.from_pretrained("google/flan-t5-base")
|
8 |
+
model = AutoModelForSeq2SeqLM.from_pretrained("google/flan-t5-base")
|
9 |
+
|
10 |
+
llm_pipeline = pipeline("text2text-generation", model=model, tokenizer=tokenizer)
|
11 |
+
|
12 |
+
def extract_arxiv_abstract(url):
|
13 |
+
try:
|
14 |
+
headers = {"User-Agent": "Mozilla/5.0"}
|
15 |
+
response = requests.get(url, headers=headers)
|
16 |
+
soup = BeautifulSoup(response.text, "html.parser")
|
17 |
+
abstract = soup.find("blockquote", class_="abstract")
|
18 |
+
if abstract:
|
19 |
+
return abstract.get_text(strip=True).replace("Abstract:", "")
|
20 |
+
else:
|
21 |
+
return "Abstract not found on this page. Please check the URL."
|
22 |
+
except Exception as e:
|
23 |
+
return f"Failed to fetch abstract: {str(e)}"
|
24 |
+
|
25 |
+
def summarize_research_paper(arxiv_url):
|
26 |
+
abstract_text = extract_arxiv_abstract(arxiv_url)
|
27 |
+
if abstract_text.startswith("Failed"):
|
28 |
+
return abstract_text
|
29 |
+
|
30 |
+
prompt = f"summarize: {abstract_text}"
|
31 |
+
summary = llm_pipeline(prompt, max_new_tokens=256)[0]['generated_text']
|
32 |
+
return f"📄 Original Abstract:\n{abstract_text}\n\n🧠 Summary:\n{summary}"
|
33 |
+
|
34 |
+
gr.Interface(
|
35 |
+
fn=summarize_research_paper,
|
36 |
+
inputs=gr.Textbox(label="arXiv Paper URL", placeholder="https://arxiv.org/abs/2306.10001"),
|
37 |
+
outputs=gr.Textbox(label="Summary", lines=15),
|
38 |
+
title="🧠 Research Paper Summarizer",
|
39 |
+
description="Summarizes arXiv paper abstracts using FLAN-T5. Works fast on CPU Hugging Face Spaces."
|
40 |
+
).launch()
|