Spaces:

omvishesh
/

ResearchpaperSummarizer

Sleeping

App Files Files Community

omvishesh commited on May 19

Commit

7e76a37

verified ·

1 Parent(s): 9d61546

Upload 2 files

Browse files

Files changed (2) hide show

app.py +80 -0
requirements.txt +9 -0

app.py ADDED Viewed

	@@ -0,0 +1,80 @@

+import gradio as gr
+import easyocr
+import cv2
+import numpy as np
+from PIL import Image
+import pdf2image
+import tempfile
+from langchain_groq import ChatGroq
+import re
+# Initialize OCR reader
+reader = easyocr.Reader(['en'])
+# Initialize LLM
+llm = ChatGroq(
+    temperature=0,
+    groq_api_key="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",
+    model_name="llama-3.3-70b-versatile"
+)
+# Utility to clean up unwanted characters
+def clean_text(text):
+    text = re.sub(r"[*•●▪️✦➡️~]+", "", text)  # remove bullet points and asterisks
+    text = re.sub(r"\s{2,}", " ", text)  # remove excessive spacing
+    return text.strip()
+def extract_text_and_summarize(file):
+    file_path = file.name
+    # If it's a PDF, convert to image
+    if file_path.lower().endswith(".pdf"):
+        images = pdf2image.convert_from_path(file_path)
+        image = np.array(images[0])
+    else:
+        image = cv2.imread(file_path)
+    # OCR
+    results = reader.readtext(image)
+    extracted_text = ' '.join([text[1] for text in results])
+    extracted_text = clean_text(extracted_text)
+    if not extracted_text.strip():
+        return "No readable text found.", ""
+    # LLM summarization
+    messages = [
+        {"role": "system", "content": "Your job is to summarize the given research paper and list its key sub-domains and topics clearly."},
+        {"role": "user", "content": extracted_text}
+    ]
+    result = llm.invoke(messages)
+    summarized_text = clean_text(result.content)
+    return extracted_text, summarized_text
+# Build Gradio UI
+with gr.Blocks(title="Research Paper Summarizer") as iface:
+    gr.Markdown(
+        """
+        # 🧠 Research Paper Summarizer
+        Upload an **image** or **PDF** of a research paper. This app will:
+        - Extract text using OCR
+        - Summarize the content
+        - List key subdomains and research topics
+        ⚡ Powered by EasyOCR & LLaMA-3 via Groq
+        """
+    )
+    with gr.Row():
+        file_input = gr.File(label="📄 Upload Research Paper (Image or PDF)", file_types=[".png", ".jpg", ".jpeg", ".pdf"])
+    with gr.Row():
+        extracted_box = gr.Textbox(label="🔍 Extracted Text", lines=10, interactive=False)
+        summary_box = gr.Textbox(label="📌 Summarized Topics & Subdomains", lines=10, interactive=False)
+    run_button = gr.Button("🔁 Run Summarizer")
+    run_button.click(fn=extract_text_and_summarize, inputs=file_input, outputs=[extracted_box, summary_box])
+iface.launch(share=True)

requirements.txt ADDED Viewed

	@@ -0,0 +1,9 @@

+gradio
+easyocr
+cv2
+numpy
+PIL
+pdf2imag
+tempfile
+langchain_groq
+re