ilovetensor commited on
Commit
20e9c3c
·
verified ·
1 Parent(s): f7985cc

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +90 -0
app.py CHANGED
@@ -0,0 +1,90 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ from transformers import Qwen2VLForConditionalGeneration, AutoProcessor
3
+ import torch
4
+
5
+ # Check if CUDA is available and set device
6
+ device = 'cuda' if torch.cuda.is_available() else 'cpu'
7
+
8
+ # Load the Qwen2-VL model and processor
9
+
10
+ model = Qwen2VLForConditionalGeneration.from_pretrained(
11
+ "Qwen/Qwen2-VL-2B-Instruct",
12
+ trust_remote_code=True,
13
+ torch_dtype=torch.bfloat16
14
+ ).to(device).eval()
15
+
16
+ processor = AutoProcessor.from_pretrained(
17
+ "Qwen/Qwen2-VL-2B-Instruct",
18
+ trust_remote_code=True
19
+ )
20
+
21
+ def extract_text(image):
22
+ # Prompt for OCR extraction
23
+ prompt = "Please extract all the text from the image, including any text in Hindi and English."
24
+ # Prepare inputs
25
+ inputs = processor(images=[image], text=prompt, return_tensors="pt").to(device)
26
+ # Generate outputs
27
+ outputs = model.generate(**inputs, max_new_tokens=500)
28
+ # Decode the generated text
29
+ generated_text = processor.batch_decode(outputs, skip_special_tokens=True)[0]
30
+ return generated_text
31
+
32
+ def search_text(extracted_text, keyword):
33
+ import re
34
+ # Compile regex pattern for case-insensitive search
35
+ pattern = re.compile(re.escape(keyword), re.IGNORECASE)
36
+ matches = pattern.finditer(extracted_text)
37
+ # Highlight matching keywords
38
+ highlighted_text = extracted_text
39
+ offset = 0
40
+ for match in matches:
41
+ start, end = match.start() + offset, match.end() + offset
42
+ # Insert HTML tags for highlighting
43
+ highlighted_text = highlighted_text[:start] + "<mark>" + highlighted_text[start:end] + "</mark>" + highlighted_text[end:]
44
+ offset += len("<mark></mark>")
45
+ return highlighted_text
46
+
47
+ with gr.Blocks() as demo:
48
+ gr.Markdown("# OCR and Keyword Search Web Application Prototype")
49
+ with gr.Row():
50
+ image_input = gr.Image(type='pil', label="Upload an image containing text in Hindi and English")
51
+ extract_button = gr.Button("Extract Text")
52
+ extracted_text_output = gr.Textbox(label="Extracted Text", lines=10)
53
+ with gr.Row():
54
+ keyword_input = gr.Textbox(label="Enter keyword to search within the extracted text")
55
+ search_button = gr.Button("Search")
56
+ search_results_output = gr.HTML(label="Search Results")
57
+
58
+ # State to store the extracted text
59
+ extracted_text_state = gr.State()
60
+
61
+ # Function to extract text and display
62
+ def extract_and_display(image):
63
+ extracted_text = extract_text(image)
64
+ extracted_text_state.value = extracted_text
65
+ return extracted_text
66
+
67
+ # Function to search within the extracted text
68
+ def search_and_display(keyword):
69
+ extracted_text = extracted_text_state.value
70
+ if not extracted_text:
71
+ return "No extracted text available. Please upload an image and extract text first."
72
+ highlighted_text = search_text(extracted_text, keyword)
73
+ return highlighted_text
74
+
75
+ # Set up button click events
76
+ extract_button.click(
77
+ fn=extract_and_display,
78
+ inputs=image_input,
79
+ outputs=extracted_text_output
80
+ )
81
+
82
+ search_button.click(
83
+ fn=search_and_display,
84
+ inputs=keyword_input,
85
+ outputs=search_results_output
86
+ )
87
+
88
+ # Launch the Gradio app
89
+
90
+ demo.launch()