Spaces:
Sleeping
Sleeping
Commit
·
657edd9
1
Parent(s):
187f902
Updated with OCR model and Gradio integration
Browse files
app.py
CHANGED
@@ -20,7 +20,24 @@ model.to(device)
|
|
20 |
def analyze_image(image):
|
21 |
try:
|
22 |
img = image.convert("RGB")
|
23 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
24 |
outputs = model.generate(**inputs)
|
25 |
return tokenizer.decode(outputs[0], skip_special_tokens=True)
|
26 |
except Exception as e:
|
@@ -35,4 +52,4 @@ demo = gr.Interface(
|
|
35 |
)
|
36 |
|
37 |
if __name__ == "__main__":
|
38 |
-
demo.launch(
|
|
|
20 |
def analyze_image(image):
|
21 |
try:
|
22 |
img = image.convert("RGB")
|
23 |
+
|
24 |
+
# Process the image
|
25 |
+
image_inputs = processor(images=img, return_tensors="pt")
|
26 |
+
# Process the text
|
27 |
+
text_inputs = tokenizer("describe this image", return_tensors="pt")
|
28 |
+
|
29 |
+
# Move inputs to the appropriate device
|
30 |
+
image_inputs = {k: v.to(device) for k, v in image_inputs.items()}
|
31 |
+
text_inputs = {k: v.to(device) for k, v in text_inputs.items()}
|
32 |
+
|
33 |
+
# Combine the inputs
|
34 |
+
inputs = {
|
35 |
+
"input_ids": text_inputs["input_ids"],
|
36 |
+
"attention_mask": text_inputs["attention_mask"],
|
37 |
+
"pixel_values": image_inputs["pixel_values"],
|
38 |
+
}
|
39 |
+
|
40 |
+
# Generate outputs
|
41 |
outputs = model.generate(**inputs)
|
42 |
return tokenizer.decode(outputs[0], skip_special_tokens=True)
|
43 |
except Exception as e:
|
|
|
52 |
)
|
53 |
|
54 |
if __name__ == "__main__":
|
55 |
+
demo.launch()
|