Spaces:

virendravaishnav
/

po-fetch-detail

Running

virendravaishnav commited on Sep 11, 2024

Commit

ee363c7

1 Parent(s): 53edb88

Updated with OCR model and Gradio integration

Files changed (1) hide show

app.py CHANGED Viewed

@@ -1,20 +1,24 @@
 import gradio as gr
-from transformers import pipeline
-# Load a Hugging Face OCR model for printed text
-ocr_model = pipeline('image-to-text', model='microsoft/trocr-base-printed')
-def analyze_image(image):
-    result = ocr_model(image)
-    return result[0]['generated_text'] if result else "No text could be extracted."
-# Gradio interface for image input
 demo = gr.Interface(
-    fn=analyze_image,
-    inputs=gr.Image(type="pil"),  # Upload an image
-    outputs="text",  # Output the extracted text
-    title="Invoice Text Extraction",
-    description="Upload an image of an invoice to extract text."
 )
 if __name__ == "__main__":

 import gradio as gr
+from transformers import AutoTokenizer, AutoModelForVision2Seq
+# Load the tokenizer and model
+model_name = "OpenGVLab/InternVL2-1B"
+tokenizer = AutoTokenizer.from_pretrained(model_name)
+model = AutoModelForVision2Seq.from_pretrained(model_name)
+def analyze_image_text(image, text):
+    # Tokenize the input
+    inputs = tokenizer(text, return_tensors="pt")
+    # Use the model to get outputs
+    outputs = model.generate(**inputs)
+    return tokenizer.decode(outputs[0])
+# Gradio interface
 demo = gr.Interface(
+    fn=analyze_image_text,
+    inputs=[gr.Image(type="pil"), gr.Textbox()],
+    outputs="text",
+    title="InternVL2-1B Image-Text Model"
 )
 if __name__ == "__main__":