SeyedAli commited on
Commit
df36445
·
1 Parent(s): f122181

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +45 -0
app.py ADDED
@@ -0,0 +1,45 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ import tempfile
3
+ from transformers import VisionEncoderDecoderModel, ViTImageProcessor, AutoTokenizer
4
+ import torch
5
+ from PIL import Image
6
+
7
+ device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
8
+
9
+ model = VisionEncoderDecoderModel.from_pretrained("SeyedAli/Persian-Image-Captioning-VIT-GPT")
10
+ feature_extractor = ViTImageProcessor.from_pretrained("SeyedAli/Persian-Image-Captioning-VIT-GPT")
11
+ tokenizer = AutoTokenizer.from_pretrained("SeyedAli/Persian-Image-Captioning-VIT-GPT")
12
+
13
+ model=model.to(device)
14
+
15
+ max_length = 32
16
+ num_beams = 4
17
+ gen_kwargs = {"max_length": max_length, "num_beams": num_beams}
18
+ def predict_step(image_paths):
19
+ images = []
20
+ for image_path in image_paths:
21
+ i_image = Image.open(image_path)
22
+ if i_image.mode != "RGB":
23
+ i_image = i_image.convert(mode="RGB")
24
+
25
+ images.append(i_image)
26
+
27
+ pixel_values = feature_extractor(images=images, return_tensors="pt").pixel_values
28
+ pixel_values = pixel_values.to(device)
29
+
30
+ output_ids = model.generate(pixel_values, **gen_kwargs)
31
+
32
+ preds = tokenizer.batch_decode(output_ids, skip_special_tokens=True)
33
+ preds = [pred.strip() for pred in preds]
34
+ return run_transaltion_model(preds[0])[0]
35
+
36
+ def ImageCaptioning(image):
37
+ with tempfile.NamedTemporaryFile(suffix=".png") as temp_image_file:
38
+ # Copy the contents of the uploaded image file to the temporary file
39
+ Image.fromarray(image).save(temp_image_file.name)
40
+ # Load the image file using Pillow
41
+ caption=predict_step([temp_image_file.name])
42
+ return caption
43
+
44
+ iface = gr.Interface(fn=ImageCaptioning, inputs="image", outputs="text")
45
+ iface.launch(share=False)