Spaces:

omvishesh
/

ResearchpaperSummarizer

Sleeping

App Files Files Community

ResearchpaperSummarizer / app.py

omvishesh

Update app.py

e4ff482 verified about 1 month ago

raw

history blame

2.48 kB

	import gradio as gr
	import easyocr
	import cv2
	import numpy as np
	from PIL import Image
	import pdf2image
	import tempfile
	from langchain_groq import ChatGroq
	import re
	import os

	# Initialize OCR reader
	reader = easyocr.Reader(['en'])

	# Initialize LLM
	llm = ChatGroq(
	temperature=0,
	groq_api_key= os.getenv("GROQ_API_KEY"),
	model_name="llama-3.3-70b-versatile"
	)

	# Utility to clean up unwanted characters
	def clean_text(text):
	text = re.sub(r"[*•●▪️✦➡️~]+", "", text) # remove bullet points and asterisks
	text = re.sub(r"\s{2,}", " ", text) # remove excessive spacing
	return text.strip()

	def extract_text_and_summarize(file):
	file_path = file.name

	# If it's a PDF, convert to image
	if file_path.lower().endswith(".pdf"):
	images = pdf2image.convert_from_path(file_path)
	image = np.array(images[0])
	else:
	image = cv2.imread(file_path)

	# OCR
	results = reader.readtext(image)
	extracted_text = ' '.join([text[1] for text in results])
	extracted_text = clean_text(extracted_text)

	if not extracted_text.strip():
	return "No readable text found.", ""

	# LLM summarization
	messages = [
	{"role": "system", "content": "Your job is to summarize the given research paper and list its key sub-domains and topics clearly."},
	{"role": "user", "content": extracted_text}
	]
	result = llm.invoke(messages)

	summarized_text = clean_text(result.content)
	return extracted_text, summarized_text

	# Build Gradio UI
	with gr.Blocks(title="Research Paper Summarizer") as iface:
	gr.Markdown(
	"""
	# 🧠 Research Paper Summarizer
	Upload an image or PDF of a research paper. This app will:
	- Extract text using OCR
	- Summarize the content
	- List key subdomains and research topics

	⚡ Powered by EasyOCR & LLaMA-3 via Groq
	"""
	)

	with gr.Row():
	file_input = gr.File(label="📄 Upload Research Paper (Image or PDF)", file_types=[".png", ".jpg", ".jpeg", ".pdf"])

	with gr.Row():
	extracted_box = gr.Textbox(label="🔍 Extracted Text", lines=10, interactive=False)
	summary_box = gr.Textbox(label="📌 Summarized Topics & Subdomains", lines=10, interactive=False)

	run_button = gr.Button("🔁 Run Summarizer")

	run_button.click(fn=extract_text_and_summarize, inputs=file_input, outputs=[extracted_box, summary_box])

	iface.launch(share=True)