Spaces:

omvishesh
/

ResearchpaperSummarizer

Sleeping

App Files Files Community

ResearchpaperSummarizer / app.py

omvishesh

Update app.py

e4ff482 verified about 1 month ago

raw

history blame contribute delete

2.48 kB

	import gradio as gr
	import easyocr
	import cv2
	import numpy as np
	from PIL import Image
	import pdf2image
	import tempfile
	from langchain_groq import ChatGroq
	import re
	import os

	# Initialize OCR reader
	reader = easyocr.Reader(['en'])

	# Initialize LLM
	llm = ChatGroq(
	temperature=0,
	groq_api_key= os.getenv("GROQ_API_KEY"),
	model_name="llama-3.3-70b-versatile"
	)

	# Utility to clean up unwanted characters
	def clean_text(text):
	text = re.sub(r"[*•●▪️✦➡️~]+", "", text) # remove bullet points and asterisks
	text = re.sub(r"\s{2,}", " ", text) # remove excessive spacing
	return text.strip()

	def extract_text_and_summarize(file):
	file_path = file.name

	# If it's a PDF, convert to image
	if file_path.lower().endswith(".pdf"):
	images = pdf2image.convert_from_path(file_path)
	image = np.array(images[0])
	else:
	image = cv2.imread(file_path)

	# OCR
	results = reader.readtext(image)
	extracted_text = ' '.join([text[1] for text in results])
	extracted_text = clean_text(extracted_text)

	if not extracted_text.strip():
	return "No readable text found.", ""

	# LLM summarization
	messages = [
	{"role": "system", "content": "Your job is to summarize the given research paper and list its key sub-domains and topics clearly."},
	{"role": "user", "content": extracted_text}
	]
	result = llm.invoke(messages)

	summarized_text = clean_text(result.content)
	return extracted_text, summarized_text

	# Build Gradio UI
	with gr.Blocks(title="Research Paper Summarizer") as iface:
	gr.Markdown(
	"""
	# 🧠 Research Paper Summarizer
	Upload an image or PDF of a research paper. This app will:
	- Extract text using OCR
	- Summarize the content
	- List key subdomains and research topics

	⚡ Powered by EasyOCR & LLaMA-3 via Groq
	"""
	)

	with gr.Row():
	file_input = gr.File(label="📄 Upload Research Paper (Image or PDF)", file_types=[".png", ".jpg", ".jpeg", ".pdf"])

	with gr.Row():
	extracted_box = gr.Textbox(label="🔍 Extracted Text", lines=10, interactive=False)
	summary_box = gr.Textbox(label="📌 Summarized Topics & Subdomains", lines=10, interactive=False)

	run_button = gr.Button("🔁 Run Summarizer")

	run_button.click(fn=extract_text_and_summarize, inputs=file_input, outputs=[extracted_box, summary_box])

	iface.launch(share=True)