Spaces:

virendravaishnav
/

po-fetch-detail

Running

virendravaishnav commited on Sep 13, 2024

Commit

1229304

1 Parent(s): 1082f74

Updated with OCR model and Gradio integration

Files changed (1) hide show

app.py CHANGED Viewed

@@ -21,14 +21,24 @@ def analyze_image(image):
         img = image.convert("RGB")
         text = "describe this image"
-        # Process both image and text together
-        inputs = processor(images=img, text=text, return_tensors="pt").to(device)
         # Generate outputs
         outputs = model.generate(**inputs)
-        # Decode the output using the processor
-        return processor.decode(outputs[0], skip_special_tokens=True)
     except Exception as e:
         return f"An error occurred: {str(e)}"

         img = image.convert("RGB")
         text = "describe this image"
+        # Process the image
+        image_inputs = processor.image_processor(images=img, return_tensors="pt").to(device)
+        # Process the text
+        text_inputs = processor.tokenizer(text, return_tensors="pt").to(device)
+        # Combine the inputs
+        inputs = {
+            "input_ids": text_inputs["input_ids"],
+            "attention_mask": text_inputs["attention_mask"],
+            "pixel_values": image_inputs["pixel_values"],
+        }
         # Generate outputs
         outputs = model.generate(**inputs)
+        # Decode the outputs
+        generated_text = processor.tokenizer.decode(outputs[0], skip_special_tokens=True)
+        return generated_text
     except Exception as e:
         return f"An error occurred: {str(e)}"