virendravaishnav commited on
Commit
1229304
·
1 Parent(s): 1082f74

Updated with OCR model and Gradio integration

Browse files
Files changed (1) hide show
  1. app.py +14 -4
app.py CHANGED
@@ -21,14 +21,24 @@ def analyze_image(image):
21
  img = image.convert("RGB")
22
  text = "describe this image"
23
 
24
- # Process both image and text together
25
- inputs = processor(images=img, text=text, return_tensors="pt").to(device)
 
 
 
 
 
 
 
 
 
26
 
27
  # Generate outputs
28
  outputs = model.generate(**inputs)
29
 
30
- # Decode the output using the processor
31
- return processor.decode(outputs[0], skip_special_tokens=True)
 
32
  except Exception as e:
33
  return f"An error occurred: {str(e)}"
34
 
 
21
  img = image.convert("RGB")
22
  text = "describe this image"
23
 
24
+ # Process the image
25
+ image_inputs = processor.image_processor(images=img, return_tensors="pt").to(device)
26
+ # Process the text
27
+ text_inputs = processor.tokenizer(text, return_tensors="pt").to(device)
28
+
29
+ # Combine the inputs
30
+ inputs = {
31
+ "input_ids": text_inputs["input_ids"],
32
+ "attention_mask": text_inputs["attention_mask"],
33
+ "pixel_values": image_inputs["pixel_values"],
34
+ }
35
 
36
  # Generate outputs
37
  outputs = model.generate(**inputs)
38
 
39
+ # Decode the outputs
40
+ generated_text = processor.tokenizer.decode(outputs[0], skip_special_tokens=True)
41
+ return generated_text
42
  except Exception as e:
43
  return f"An error occurred: {str(e)}"
44