Spaces:
Sleeping
Sleeping
Commit
·
1229304
1
Parent(s):
1082f74
Updated with OCR model and Gradio integration
Browse files
app.py
CHANGED
@@ -21,14 +21,24 @@ def analyze_image(image):
|
|
21 |
img = image.convert("RGB")
|
22 |
text = "describe this image"
|
23 |
|
24 |
-
# Process
|
25 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
26 |
|
27 |
# Generate outputs
|
28 |
outputs = model.generate(**inputs)
|
29 |
|
30 |
-
# Decode the
|
31 |
-
|
|
|
32 |
except Exception as e:
|
33 |
return f"An error occurred: {str(e)}"
|
34 |
|
|
|
21 |
img = image.convert("RGB")
|
22 |
text = "describe this image"
|
23 |
|
24 |
+
# Process the image
|
25 |
+
image_inputs = processor.image_processor(images=img, return_tensors="pt").to(device)
|
26 |
+
# Process the text
|
27 |
+
text_inputs = processor.tokenizer(text, return_tensors="pt").to(device)
|
28 |
+
|
29 |
+
# Combine the inputs
|
30 |
+
inputs = {
|
31 |
+
"input_ids": text_inputs["input_ids"],
|
32 |
+
"attention_mask": text_inputs["attention_mask"],
|
33 |
+
"pixel_values": image_inputs["pixel_values"],
|
34 |
+
}
|
35 |
|
36 |
# Generate outputs
|
37 |
outputs = model.generate(**inputs)
|
38 |
|
39 |
+
# Decode the outputs
|
40 |
+
generated_text = processor.tokenizer.decode(outputs[0], skip_special_tokens=True)
|
41 |
+
return generated_text
|
42 |
except Exception as e:
|
43 |
return f"An error occurred: {str(e)}"
|
44 |
|