PapersImpact

Running on Zero

App Files Files Community

openfree commited on Jan 18

Commit

a12abfd

verified ·

1 Parent(s): 3544bdd

Update app.py

Browse files

Files changed (1) hide show

app.py +69 -24

app.py CHANGED Viewed

@@ -6,32 +6,61 @@ import torch.nn.functional as F
 import torch.nn as nn
 import re
 import requests
-import arxiv
 model_path = r'ssocean/NAIP'
-device = 'cuda:0'
 global model, tokenizer
 model = None
 tokenizer = None
 def fetch_arxiv_paper(arxiv_input):
-    """Fetch paper details from arXiv URL or ID."""
     try:
         # Extract arXiv ID from URL or use directly
-        arxiv_id = arxiv_input.split('/')[-1]
-        if 'abs' in arxiv_id:
-            arxiv_id = arxiv_id.split('abs/')[-1]
-        if '.pdf' in arxiv_id:
-            arxiv_id = arxiv_id.replace('.pdf', '')
-        # Search for the paper
-        search = arxiv.Search(id_list=[arxiv_id])
-        paper = next(search.results())
         return {
-            "title": paper.title,
-            "abstract": paper.summary,
             "success": True,
             "message": "Paper fetched successfully!"
         }
@@ -50,10 +79,12 @@ def predict(title, abstract):
     global model, tokenizer
     if model is None:
         model = AutoModelForSequenceClassification.from_pretrained(
-        model_path,
-        num_labels=1,
-        load_in_8bit=True,)
         tokenizer = AutoTokenizer.from_pretrained(model_path)
         model.eval()
     text = f'''Given a certain paper, Title: {title}\n Abstract: {abstract}. \n Predict its normalized academic impact (between 0 and 1):'''
     inputs = tokenizer(text, return_tensors="pt").to(device)
@@ -177,6 +208,17 @@ css = """
     border-radius: 1rem;
     margin-top: 2rem;
 }
 """
 with gr.Blocks(theme=gr.themes.Default(), css=css) as iface:
@@ -190,12 +232,14 @@ with gr.Blocks(theme=gr.themes.Default(), css=css) as iface:
     with gr.Row():
         with gr.Column(elem_classes="input-section"):
             # arXiv Input
-            arxiv_input = gr.Textbox(
-                lines=1,
-                placeholder="Enter arXiv URL or ID (e.g., 2006.16236 or https://arxiv.org/abs/2006.16236)",
-                label="📑 arXiv Paper URL/ID"
-            )
-            fetch_button = gr.Button("🔍 Fetch Paper Details", variant="secondary")
             gr.Markdown("### 📝 Or Enter Paper Details Manually")
@@ -213,8 +257,9 @@ with gr.Blocks(theme=gr.themes.Default(), css=css) as iface:
             submit_button = gr.Button("🎯 Predict Impact", interactive=False, variant="primary")
         with gr.Column(elem_classes="result-section"):
-            score_output = gr.Number(label="🎯 Impact Score")
-            grade_output = gr.Textbox(label="🏆 Grade", value="")
     with gr.Row(elem_classes="methodology-section"):
         gr.Markdown(

 import torch.nn as nn
 import re
 import requests
+from urllib.parse import urlparse
+import xml.etree.ElementTree as ET
 model_path = r'ssocean/NAIP'
+device = 'cuda' if torch.cuda.is_available() else 'cpu'
 global model, tokenizer
 model = None
 tokenizer = None
 def fetch_arxiv_paper(arxiv_input):
+    """Fetch paper details from arXiv URL or ID using requests."""
     try:
         # Extract arXiv ID from URL or use directly
+        if 'arxiv.org' in arxiv_input:
+            parsed = urlparse(arxiv_input)
+            path = parsed.path
+            arxiv_id = path.split('/')[-1].replace('.pdf', '')
+        else:
+            arxiv_id = arxiv_input.strip()
+        # Fetch metadata using arXiv API
+        api_url = f'http://export.arxiv.org/api/query?id_list={arxiv_id}'
+        response = requests.get(api_url)
+        if response.status_code != 200:
+            return {
+                "title": "",
+                "abstract": "",
+                "success": False,
+                "message": "Error fetching paper from arXiv API"
+            }
+        # Parse the response XML
+        root = ET.fromstring(response.text)
+        # ArXiv API uses namespaces
+        ns = {'arxiv': 'http://www.w3.org/2005/Atom'}
+        # Extract title and abstract
+        entry = root.find('.//arxiv:entry', ns)
+        if entry is None:
+            return {
+                "title": "",
+                "abstract": "",
+                "success": False,
+                "message": "Paper not found"
+            }
+        title = entry.find('arxiv:title', ns).text.strip()
+        abstract = entry.find('arxiv:summary', ns).text.strip()
         return {
+            "title": title,
+            "abstract": abstract,
             "success": True,
             "message": "Paper fetched successfully!"
         }
     global model, tokenizer
     if model is None:
         model = AutoModelForSequenceClassification.from_pretrained(
+            model_path,
+            num_labels=1,
+            torch_dtype=torch.float32 if device == 'cpu' else torch.float16
+        )
         tokenizer = AutoTokenizer.from_pretrained(model_path)
+        model.to(device)
         model.eval()
     text = f'''Given a certain paper, Title: {title}\n Abstract: {abstract}. \n Predict its normalized academic impact (between 0 and 1):'''
     inputs = tokenizer(text, return_tensors="pt").to(device)
     border-radius: 1rem;
     margin-top: 2rem;
 }
+.grade-display {
+    font-size: 3rem;
+    text-align: center;
+    margin: 1rem 0;
+}
+.arxiv-input {
+    margin-bottom: 1.5rem;
+    padding: 1rem;
+    background: #f3f4f6;
+    border-radius: 0.5rem;
+}
 """
 with gr.Blocks(theme=gr.themes.Default(), css=css) as iface:
     with gr.Row():
         with gr.Column(elem_classes="input-section"):
             # arXiv Input
+            with gr.Group(elem_classes="arxiv-input"):
+                gr.Markdown("### 📑 Import from arXiv")
+                arxiv_input = gr.Textbox(
+                    lines=1,
+                    placeholder="Enter arXiv URL or ID (e.g., 2006.16236 or https://arxiv.org/abs/2006.16236)",
+                    label="arXiv Paper URL/ID"
+                )
+                fetch_button = gr.Button("🔍 Fetch Paper Details", variant="secondary")
             gr.Markdown("### 📝 Or Enter Paper Details Manually")
             submit_button = gr.Button("🎯 Predict Impact", interactive=False, variant="primary")
         with gr.Column(elem_classes="result-section"):
+            with gr.Group():
+                score_output = gr.Number(label="🎯 Impact Score")
+                grade_output = gr.Textbox(label="🏆 Grade", value="", elem_classes="grade-display")
     with gr.Row(elem_classes="methodology-section"):
         gr.Markdown(