Manishkumaryadav commited on
Commit
ab28335
Β·
verified Β·
1 Parent(s): 9c5ba14

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +57 -0
app.py ADDED
@@ -0,0 +1,57 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ import pdfplumber
3
+ import pytesseract
4
+ from PIL import Image
5
+ from transformers import pipeline
6
+ from sentence_transformers import SentenceTransformer, util
7
+
8
+ # Load Hugging Face models
9
+ qa_pipeline = pipeline("question-answering", model="deepset/roberta-base-squad2")
10
+ summarizer = pipeline("summarization", model="facebook/bart-large-cnn")
11
+ embedder = SentenceTransformer("sentence-transformers/all-MiniLM-L6-v2")
12
+
13
+ # Function to extract text from PDF
14
+ def extract_text_from_pdf(pdf_file):
15
+ text = ""
16
+ with pdfplumber.open(pdf_file) as pdf:
17
+ for page in pdf.pages:
18
+ text += page.extract_text() + "\n"
19
+ return text
20
+
21
+ # Function to extract text from image using OCR
22
+ def extract_text_from_image(image_file):
23
+ image = Image.open(image_file)
24
+ return pytesseract.image_to_string(image)
25
+
26
+ # Function to process document and answer questions
27
+ def document_processor(uploaded_file, query):
28
+ text = ""
29
+ if uploaded_file.name.endswith(".pdf"):
30
+ text = extract_text_from_pdf(uploaded_file.name)
31
+ elif uploaded_file.name.endswith((".png", ".jpg", ".jpeg")):
32
+ text = extract_text_from_image(uploaded_file.name)
33
+ else:
34
+ text = uploaded_file.read().decode("utf-8")
35
+
36
+ if query.lower() == "summarize":
37
+ summary = summarizer(text, max_length=150, min_length=30, do_sample=False)
38
+ return summary[0]["summary_text"]
39
+
40
+ # Find the best-matching answer
41
+ answer = qa_pipeline(question=query, context=text)
42
+ return answer["answer"]
43
+
44
+ # Gradio UI
45
+ with gr.Blocks() as app:
46
+ gr.Markdown("# πŸ“„ Smart Document Explorer")
47
+
48
+ with gr.Row():
49
+ uploaded_file = gr.File(label="Upload Document (PDF, Image, or Text)")
50
+ query = gr.Textbox(label="Ask a question (or type 'summarize')", placeholder="What is this document about?")
51
+
52
+ output_text = gr.Textbox(label="AI Response")
53
+
54
+ submit_btn = gr.Button("Process Document")
55
+ submit_btn.click(document_processor, inputs=[uploaded_file, query], outputs=output_text)
56
+
57
+ app.launch()