Spaces:

ilovetensor
/

snap-assist

Sleeping

App Files Files Community

ilovetensor commited on Sep 29, 2024

Commit

13ebdd6

verified ·

1 Parent(s): 9038ad8

Update app.py

Browse files

Files changed (1) hide show

app.py +43 -74

app.py CHANGED Viewed

@@ -1,90 +1,59 @@
 import gradio as gr
-from transformers import Qwen2VLForConditionalGeneration, AutoProcessor
 import torch
-# Check if CUDA is available and set device
 device = 'cuda' if torch.cuda.is_available() else 'cpu'
-# Load the Qwen2-VL model and processor
-model = Qwen2VLForConditionalGeneration.from_pretrained(
-    "Qwen/Qwen2-VL-2B-Instruct",
-    trust_remote_code=True,
-    torch_dtype=torch.bfloat16
-).to(device).eval()
-processor = AutoProcessor.from_pretrained(
-    "Qwen/Qwen2-VL-2B-Instruct",
-    trust_remote_code=True
-)
 def extract_text(image):
-    # Prompt for OCR extraction
-    prompt = "Please extract all the text from the image, including any text in Hindi and English."
-    # Prepare inputs
-    inputs = processor(images=[image], text=prompt, return_tensors="pt").to(device)
-    # Generate outputs
-    outputs = model.generate(**inputs, max_new_tokens=500)
-    # Decode the generated text
-    generated_text = processor.batch_decode(outputs, skip_special_tokens=True)[0]
-    return generated_text
-def search_text(extracted_text, keyword):
-    import re
-    # Compile regex pattern for case-insensitive search
     pattern = re.compile(re.escape(keyword), re.IGNORECASE)
-    matches = pattern.finditer(extracted_text)
-    # Highlight matching keywords
-    highlighted_text = extracted_text
-    offset = 0
-    for match in matches:
-        start, end = match.start() + offset, match.end() + offset
-        # Insert HTML tags for highlighting
-        highlighted_text = highlighted_text[:start] + "<mark>" + highlighted_text[start:end] + "</mark>" + highlighted_text[end:]
-        offset += len("<mark></mark>")
     return highlighted_text
 with gr.Blocks() as demo:
-    gr.Markdown("# OCR and Keyword Search Web Application Prototype")
-    with gr.Row():
-        image_input = gr.Image(type='pil', label="Upload an image containing text in Hindi and English")
         extract_button = gr.Button("Extract Text")
     extracted_text_output = gr.Textbox(label="Extracted Text", lines=10)
-    with gr.Row():
-        keyword_input = gr.Textbox(label="Enter keyword to search within the extracted text")
-        search_button = gr.Button("Search")
     search_results_output = gr.HTML(label="Search Results")
-    # State to store the extracted text
-    extracted_text_state = gr.State()
-    # Function to extract text and display
-    def extract_and_display(image):
-        extracted_text = extract_text(image)
-        extracted_text_state.value = extracted_text
-        return extracted_text
-    # Function to search within the extracted text
-    def search_and_display(keyword):
-        extracted_text = extracted_text_state.value
-        if not extracted_text:
-            return "No extracted text available. Please upload an image and extract text first."
-        highlighted_text = search_text(extracted_text, keyword)
-        return highlighted_text
-    # Set up button click events
-    extract_button.click(
-        fn=extract_and_display,
-        inputs=image_input,
-        outputs=extracted_text_output
-    )
-    search_button.click(
-        fn=search_and_display,
-        inputs=keyword_input,
-        outputs=search_results_output
-    )
-# Launch the Gradio app
-demo.launch()

 import gradio as gr
+from transformers import AutoTokenizer, AutoModel
+from PIL import Image
+import os
+import re
 import torch
+# Load the GOT model
 device = 'cuda' if torch.cuda.is_available() else 'cpu'
+tokenizer = AutoTokenizer.from_pretrained('ucaslcl/GOT-OCR2_0', trust_remote_code=True)
+model = AutoModel.from_pretrained('ucaslcl/GOT-OCR2_0', trust_remote_code=True, low_cpu_mem_usage=True, device_map='auto', use_safetensors=True)
+model = model.eval().to(device)
 def extract_text(image):
+    # Save the image to a temporary file
+    image_path = 'temp_image.png'
+    image.save(image_path)
+    # Use the GOT model to extract text
+    try:
+        res = model.chat(tokenizer, image_path, ocr_type='ocr')
+        return res, res  # Return the extracted text and also set it in the state variable
+    except Exception as e:
+        return f"Error: {str(e)}", ""
+    finally:
+        if os.path.exists(image_path):
+            os.remove(image_path)
+def keyword_search(extracted_text, keyword):
+    if not extracted_text:
+        return "No text extracted yet."
+    if not keyword:
+        return extracted_text
+    # Escape HTML special characters
+    extracted_text = extracted_text.replace('&', '&amp;').replace('<', '&lt;').replace('>', '&gt;')
+    # Use regular expressions to find matches, ignoring case
     pattern = re.compile(re.escape(keyword), re.IGNORECASE)
+    highlighted_text = pattern.sub(lambda x: f"<mark>{x.group()}</mark>", extracted_text)
     return highlighted_text
 with gr.Blocks() as demo:
+    gr.Markdown("# OCR and Document Search Web Application")
+    extracted_text_state = gr.State()
+    with gr.Column():
+        image_input = gr.Image(type="pil", label="Upload an image")
         extract_button = gr.Button("Extract Text")
     extracted_text_output = gr.Textbox(label="Extracted Text", lines=10)
+    keyword_input = gr.Textbox(label="Enter keyword to search")
+    search_button = gr.Button("Search")
     search_results_output = gr.HTML(label="Search Results")
+    extract_button.click(fn=extract_text, inputs=image_input, outputs=[extracted_text_output, extracted_text_state])
+    search_button.click(fn=keyword_search, inputs=[extracted_text_state, keyword_input], outputs=search_results_output)
+if __name__ == "__main__":
+    demo.launch()