Spaces:

omvishesh
/

ResearchpaperSummarizer

Sleeping

App Files Files Community

omvishesh commited on May 20

Commit

9e7576b

verified ·

1 Parent(s): eca5dfd

Update app.py

Browse files

Files changed (1) hide show

app.py +79 -79

app.py CHANGED Viewed

@@ -1,80 +1,80 @@
-import gradio as gr
-import easyocr
-import cv2
-import numpy as np
-from PIL import Image
-import pdf2image
-import tempfile
-from langchain_groq import ChatGroq
-import re
-# Initialize OCR reader
-reader = easyocr.Reader(['en'])
-# Initialize LLM
-llm = ChatGroq(
-    temperature=0,
-    groq_api_key="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",
-    model_name="llama-3.3-70b-versatile"
-)
-# Utility to clean up unwanted characters
-def clean_text(text):
-    text = re.sub(r"[*•●▪️✦➡️~]+", "", text)  # remove bullet points and asterisks
-    text = re.sub(r"\s{2,}", " ", text)  # remove excessive spacing
-    return text.strip()
-def extract_text_and_summarize(file):
-    file_path = file.name
-    # If it's a PDF, convert to image
-    if file_path.lower().endswith(".pdf"):
-        images = pdf2image.convert_from_path(file_path)
-        image = np.array(images[0])
-    else:
-        image = cv2.imread(file_path)
-    # OCR
-    results = reader.readtext(image)
-    extracted_text = ' '.join([text[1] for text in results])
-    extracted_text = clean_text(extracted_text)
-    if not extracted_text.strip():
-        return "No readable text found.", ""
-    # LLM summarization
-    messages = [
-        {"role": "system", "content": "Your job is to summarize the given research paper and list its key sub-domains and topics clearly."},
-        {"role": "user", "content": extracted_text}
-    ]
-    result = llm.invoke(messages)
-    summarized_text = clean_text(result.content)
-    return extracted_text, summarized_text
-# Build Gradio UI
-with gr.Blocks(title="Research Paper Summarizer") as iface:
-    gr.Markdown(
-        """
-        # 🧠 Research Paper Summarizer
-        Upload an **image** or **PDF** of a research paper. This app will:
-        - Extract text using OCR
-        - Summarize the content
-        - List key subdomains and research topics
-        ⚡ Powered by EasyOCR & LLaMA-3 via Groq
-        """
-    )
-    with gr.Row():
-        file_input = gr.File(label="📄 Upload Research Paper (Image or PDF)", file_types=[".png", ".jpg", ".jpeg", ".pdf"])
-    with gr.Row():
-        extracted_box = gr.Textbox(label="🔍 Extracted Text", lines=10, interactive=False)
-        summary_box = gr.Textbox(label="📌 Summarized Topics & Subdomains", lines=10, interactive=False)
-    run_button = gr.Button("🔁 Run Summarizer")
-    run_button.click(fn=extract_text_and_summarize, inputs=file_input, outputs=[extracted_box, summary_box])
 iface.launch(share=True)

+import gradio as gr
+import easyocr
+import cv2
+import numpy as np
+from PIL import Image
+import pdf2image
+import tempfile
+from langchain_groq import ChatGroq
+import re
+# Initialize OCR reader
+reader = easyocr.Reader(['en'])
+# Initialize LLM
+llm = ChatGroq(
+    temperature=0,
+    groq_api_key= groq_api_key,
+    model_name="llama-3.3-70b-versatile"
+)
+# Utility to clean up unwanted characters
+def clean_text(text):
+    text = re.sub(r"[*•●▪️✦➡️~]+", "", text)  # remove bullet points and asterisks
+    text = re.sub(r"\s{2,}", " ", text)  # remove excessive spacing
+    return text.strip()
+def extract_text_and_summarize(file):
+    file_path = file.name
+    # If it's a PDF, convert to image
+    if file_path.lower().endswith(".pdf"):
+        images = pdf2image.convert_from_path(file_path)
+        image = np.array(images[0])
+    else:
+        image = cv2.imread(file_path)
+    # OCR
+    results = reader.readtext(image)
+    extracted_text = ' '.join([text[1] for text in results])
+    extracted_text = clean_text(extracted_text)
+    if not extracted_text.strip():
+        return "No readable text found.", ""
+    # LLM summarization
+    messages = [
+        {"role": "system", "content": "Your job is to summarize the given research paper and list its key sub-domains and topics clearly."},
+        {"role": "user", "content": extracted_text}
+    ]
+    result = llm.invoke(messages)
+    summarized_text = clean_text(result.content)
+    return extracted_text, summarized_text
+# Build Gradio UI
+with gr.Blocks(title="Research Paper Summarizer") as iface:
+    gr.Markdown(
+        """
+        # 🧠 Research Paper Summarizer
+        Upload an **image** or **PDF** of a research paper. This app will:
+        - Extract text using OCR
+        - Summarize the content
+        - List key subdomains and research topics
+        ⚡ Powered by EasyOCR & LLaMA-3 via Groq
+        """
+    )
+    with gr.Row():
+        file_input = gr.File(label="📄 Upload Research Paper (Image or PDF)", file_types=[".png", ".jpg", ".jpeg", ".pdf"])
+    with gr.Row():
+        extracted_box = gr.Textbox(label="🔍 Extracted Text", lines=10, interactive=False)
+        summary_box = gr.Textbox(label="📌 Summarized Topics & Subdomains", lines=10, interactive=False)
+    run_button = gr.Button("🔁 Run Summarizer")
+    run_button.click(fn=extract_text_and_summarize, inputs=file_input, outputs=[extracted_box, summary_box])
 iface.launch(share=True)