krishnv commited on
Commit
2c7adc2
·
verified ·
1 Parent(s): 9633d94

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +7 -30
app.py CHANGED
@@ -1,38 +1,15 @@
1
  from transformers import AutoProcessor, AutoModelForCausalLM
 
2
  from PIL import Image
3
- import gradio as gr
4
 
5
- # Load the processor and model
6
  processor = AutoProcessor.from_pretrained("microsoft/git-base-coco")
7
  model = AutoModelForCausalLM.from_pretrained("microsoft/git-base-coco")
8
 
9
- # Define the captioning function
10
- def caption_image(image):
11
- # Process the image
12
- pixel_values = processor(images=image, return_tensors="pt").pixel_values
13
- # Generate captions
14
- generated_ids = model.generate(pixel_values=pixel_values, max_length=50)
15
- generated_caption = processor.batch_decode(generated_ids, skip_special_tokens=True)[0]
16
- return generated_caption
17
 
18
- # Define Gradio interface components
19
- inputs = [
20
- gr.inputs.Image(type='pil', label='Upload Image')
21
- ]
22
 
23
- outputs = [
24
- gr.outputs.Textbox(label='Generated Caption')
25
- ]
26
-
27
- # Define Gradio app properties
28
- title = "Image Captioning Application"
29
- description = "Upload an image to see the caption generated by the model"
30
-
31
- # Create and launch the Gradio interface
32
- gr.Interface(
33
- fn=caption_image,
34
- inputs=inputs,
35
- outputs=outputs,
36
- title=title,
37
- description=description,
38
- ).launch(debug=True)
 
1
  from transformers import AutoProcessor, AutoModelForCausalLM
2
+ import requests
3
  from PIL import Image
 
4
 
 
5
  processor = AutoProcessor.from_pretrained("microsoft/git-base-coco")
6
  model = AutoModelForCausalLM.from_pretrained("microsoft/git-base-coco")
7
 
8
+ url = "http://images.cocodataset.org/val2017/000000039769.jpg"
9
+ image = Image.open(requests.get(url, stream=True).raw)
 
 
 
 
 
 
10
 
11
+ pixel_values = processor(images=image, return_tensors="pt").pixel_values
 
 
 
12
 
13
+ generated_ids = model.generate(pixel_values=pixel_values, max_length=50)
14
+ generated_caption = processor.batch_decode(generated_ids, skip_special_tokens=True)[0]
15
+ print(generated_caption)