File size: 1,668 Bytes
37ad8f8
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
import requests
import gradio as gr
from bs4 import BeautifulSoup
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM, pipeline

# Lightweight model for instruction-tuned summarization
tokenizer = AutoTokenizer.from_pretrained("google/flan-t5-base")
model = AutoModelForSeq2SeqLM.from_pretrained("google/flan-t5-base")

llm_pipeline = pipeline("text2text-generation", model=model, tokenizer=tokenizer)

def extract_arxiv_abstract(url):
    try:
        headers = {"User-Agent": "Mozilla/5.0"}
        response = requests.get(url, headers=headers)
        soup = BeautifulSoup(response.text, "html.parser")
        abstract = soup.find("blockquote", class_="abstract")
        if abstract:
            return abstract.get_text(strip=True).replace("Abstract:", "")
        else:
            return "Abstract not found on this page. Please check the URL."
    except Exception as e:
        return f"Failed to fetch abstract: {str(e)}"

def summarize_research_paper(arxiv_url):
    abstract_text = extract_arxiv_abstract(arxiv_url)
    if abstract_text.startswith("Failed"):
        return abstract_text

    prompt = f"summarize: {abstract_text}"
    summary = llm_pipeline(prompt, max_new_tokens=256)[0]['generated_text']
    return f"๐Ÿ“„ Original Abstract:\n{abstract_text}\n\n๐Ÿง  Summary:\n{summary}"

gr.Interface(
    fn=summarize_research_paper,
    inputs=gr.Textbox(label="arXiv Paper URL", placeholder="https://arxiv.org/abs/2306.10001"),
    outputs=gr.Textbox(label="Summary", lines=15),
    title="๐Ÿง  Research Paper Summarizer",
    description="Summarizes arXiv paper abstracts using FLAN-T5. Works fast on CPU Hugging Face Spaces."
).launch()