Spaces:

ilovetensor
/

snap-assist

Sleeping

App Files Files Community

ilovetensor commited on Sep 29, 2024

Commit

20e9c3c

verified ·

1 Parent(s): f7985cc

Update app.py

Browse files

Files changed (1) hide show

app.py +90 -0

app.py CHANGED Viewed

	@@ -0,0 +1,90 @@

+import gradio as gr
+from transformers import Qwen2VLForConditionalGeneration, AutoProcessor
+import torch
+# Check if CUDA is available and set device
+device = 'cuda' if torch.cuda.is_available() else 'cpu'
+# Load the Qwen2-VL model and processor
+model = Qwen2VLForConditionalGeneration.from_pretrained(
+    "Qwen/Qwen2-VL-2B-Instruct",
+    trust_remote_code=True,
+    torch_dtype=torch.bfloat16
+).to(device).eval()
+processor = AutoProcessor.from_pretrained(
+    "Qwen/Qwen2-VL-2B-Instruct",
+    trust_remote_code=True
+)
+def extract_text(image):
+    # Prompt for OCR extraction
+    prompt = "Please extract all the text from the image, including any text in Hindi and English."
+    # Prepare inputs
+    inputs = processor(images=[image], text=prompt, return_tensors="pt").to(device)
+    # Generate outputs
+    outputs = model.generate(**inputs, max_new_tokens=500)
+    # Decode the generated text
+    generated_text = processor.batch_decode(outputs, skip_special_tokens=True)[0]
+    return generated_text
+def search_text(extracted_text, keyword):
+    import re
+    # Compile regex pattern for case-insensitive search
+    pattern = re.compile(re.escape(keyword), re.IGNORECASE)
+    matches = pattern.finditer(extracted_text)
+    # Highlight matching keywords
+    highlighted_text = extracted_text
+    offset = 0
+    for match in matches:
+        start, end = match.start() + offset, match.end() + offset
+        # Insert HTML tags for highlighting
+        highlighted_text = highlighted_text[:start] + "<mark>" + highlighted_text[start:end] + "</mark>" + highlighted_text[end:]
+        offset += len("<mark></mark>")
+    return highlighted_text
+with gr.Blocks() as demo:
+    gr.Markdown("# OCR and Keyword Search Web Application Prototype")
+    with gr.Row():
+        image_input = gr.Image(type='pil', label="Upload an image containing text in Hindi and English")
+        extract_button = gr.Button("Extract Text")
+    extracted_text_output = gr.Textbox(label="Extracted Text", lines=10)
+    with gr.Row():
+        keyword_input = gr.Textbox(label="Enter keyword to search within the extracted text")
+        search_button = gr.Button("Search")
+    search_results_output = gr.HTML(label="Search Results")
+    # State to store the extracted text
+    extracted_text_state = gr.State()
+    # Function to extract text and display
+    def extract_and_display(image):
+        extracted_text = extract_text(image)
+        extracted_text_state.value = extracted_text
+        return extracted_text
+    # Function to search within the extracted text
+    def search_and_display(keyword):
+        extracted_text = extracted_text_state.value
+        if not extracted_text:
+            return "No extracted text available. Please upload an image and extract text first."
+        highlighted_text = search_text(extracted_text, keyword)
+        return highlighted_text
+    # Set up button click events
+    extract_button.click(
+        fn=extract_and_display,
+        inputs=image_input,
+        outputs=extracted_text_output
+    )
+    search_button.click(
+        fn=search_and_display,
+        inputs=keyword_input,
+        outputs=search_results_output
+    )
+# Launch the Gradio app
+demo.launch()