vikpande commited on
Commit
37ad8f8
·
1 Parent(s): 00031ad

initial commit: research summarizer with FLAN-T5

Browse files
Files changed (1) hide show
  1. app.py +40 -0
app.py CHANGED
@@ -0,0 +1,40 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import requests
2
+ import gradio as gr
3
+ from bs4 import BeautifulSoup
4
+ from transformers import AutoTokenizer, AutoModelForSeq2SeqLM, pipeline
5
+
6
+ # Lightweight model for instruction-tuned summarization
7
+ tokenizer = AutoTokenizer.from_pretrained("google/flan-t5-base")
8
+ model = AutoModelForSeq2SeqLM.from_pretrained("google/flan-t5-base")
9
+
10
+ llm_pipeline = pipeline("text2text-generation", model=model, tokenizer=tokenizer)
11
+
12
+ def extract_arxiv_abstract(url):
13
+ try:
14
+ headers = {"User-Agent": "Mozilla/5.0"}
15
+ response = requests.get(url, headers=headers)
16
+ soup = BeautifulSoup(response.text, "html.parser")
17
+ abstract = soup.find("blockquote", class_="abstract")
18
+ if abstract:
19
+ return abstract.get_text(strip=True).replace("Abstract:", "")
20
+ else:
21
+ return "Abstract not found on this page. Please check the URL."
22
+ except Exception as e:
23
+ return f"Failed to fetch abstract: {str(e)}"
24
+
25
+ def summarize_research_paper(arxiv_url):
26
+ abstract_text = extract_arxiv_abstract(arxiv_url)
27
+ if abstract_text.startswith("Failed"):
28
+ return abstract_text
29
+
30
+ prompt = f"summarize: {abstract_text}"
31
+ summary = llm_pipeline(prompt, max_new_tokens=256)[0]['generated_text']
32
+ return f"📄 Original Abstract:\n{abstract_text}\n\n🧠 Summary:\n{summary}"
33
+
34
+ gr.Interface(
35
+ fn=summarize_research_paper,
36
+ inputs=gr.Textbox(label="arXiv Paper URL", placeholder="https://arxiv.org/abs/2306.10001"),
37
+ outputs=gr.Textbox(label="Summary", lines=15),
38
+ title="🧠 Research Paper Summarizer",
39
+ description="Summarizes arXiv paper abstracts using FLAN-T5. Works fast on CPU Hugging Face Spaces."
40
+ ).launch()