lotrlol Amrrs commited on
Commit
a7898c7
·
0 Parent(s):

Duplicate from Amrrs/image-caption-with-vit-gpt2

Browse files

Co-authored-by: amrrs <[email protected]>

.gitattributes ADDED
@@ -0,0 +1,27 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ *.7z filter=lfs diff=lfs merge=lfs -text
2
+ *.arrow filter=lfs diff=lfs merge=lfs -text
3
+ *.bin filter=lfs diff=lfs merge=lfs -text
4
+ *.bin.* filter=lfs diff=lfs merge=lfs -text
5
+ *.bz2 filter=lfs diff=lfs merge=lfs -text
6
+ *.ftz filter=lfs diff=lfs merge=lfs -text
7
+ *.gz filter=lfs diff=lfs merge=lfs -text
8
+ *.h5 filter=lfs diff=lfs merge=lfs -text
9
+ *.joblib filter=lfs diff=lfs merge=lfs -text
10
+ *.lfs.* filter=lfs diff=lfs merge=lfs -text
11
+ *.model filter=lfs diff=lfs merge=lfs -text
12
+ *.msgpack filter=lfs diff=lfs merge=lfs -text
13
+ *.onnx filter=lfs diff=lfs merge=lfs -text
14
+ *.ot filter=lfs diff=lfs merge=lfs -text
15
+ *.parquet filter=lfs diff=lfs merge=lfs -text
16
+ *.pb filter=lfs diff=lfs merge=lfs -text
17
+ *.pt filter=lfs diff=lfs merge=lfs -text
18
+ *.pth filter=lfs diff=lfs merge=lfs -text
19
+ *.rar filter=lfs diff=lfs merge=lfs -text
20
+ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
21
+ *.tar.* filter=lfs diff=lfs merge=lfs -text
22
+ *.tflite filter=lfs diff=lfs merge=lfs -text
23
+ *.tgz filter=lfs diff=lfs merge=lfs -text
24
+ *.xz filter=lfs diff=lfs merge=lfs -text
25
+ *.zip filter=lfs diff=lfs merge=lfs -text
26
+ *.zstandard filter=lfs diff=lfs merge=lfs -text
27
+ *tfevents* filter=lfs diff=lfs merge=lfs -text
README.md ADDED
@@ -0,0 +1,47 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ title: Image Caption With Vit Gpt2
3
+ emoji: 👀
4
+ colorFrom: pink
5
+ colorTo: pink
6
+ sdk: gradio
7
+ app_file: app.py
8
+ pinned: false
9
+ license: mit
10
+ duplicated_from: Amrrs/image-caption-with-vit-gpt2
11
+ ---
12
+
13
+ # Configuration
14
+
15
+ `title`: _string_
16
+ Display title for the Space
17
+
18
+ `emoji`: _string_
19
+ Space emoji (emoji-only character allowed)
20
+
21
+ `colorFrom`: _string_
22
+ Color for Thumbnail gradient (red, yellow, green, blue, indigo, purple, pink, gray)
23
+
24
+ `colorTo`: _string_
25
+ Color for Thumbnail gradient (red, yellow, green, blue, indigo, purple, pink, gray)
26
+
27
+ `sdk`: _string_
28
+ Can be either `gradio`, `streamlit`, or `static`
29
+
30
+ `sdk_version` : _string_
31
+ Only applicable for `streamlit` SDK.
32
+ See [doc](https://hf.co/docs/hub/spaces) for more info on supported versions.
33
+
34
+ `app_file`: _string_
35
+ Path to your main application file (which contains either `gradio` or `streamlit` Python code, or `static` html code).
36
+ Path is relative to the root of the repository.
37
+
38
+ `models`: _List[string]_
39
+ HF model IDs (like "gpt2" or "deepset/roberta-base-squad2") used in the Space.
40
+ Will be parsed automatically from your code if not specified here.
41
+
42
+ `datasets`: _List[string]_
43
+ HF dataset IDs (like "common_voice" or "oscar-corpus/OSCAR-2109") used in the Space.
44
+ Will be parsed automatically from your code if not specified here.
45
+
46
+ `pinned`: _boolean_
47
+ Whether the Space stays on top of your list.
app.py ADDED
@@ -0,0 +1,78 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # -*- coding: utf-8 -*-
2
+ """Image Captioning with ViT+GPT2
3
+
4
+ Automatically generated by Colaboratory.
5
+
6
+ Original file is located at
7
+ https://colab.research.google.com/drive/1P3O0gO5AUqSmM8rE9dxy2tXJ-9jkhxHz
8
+ """
9
+
10
+ #! pip install transformers -q
11
+
12
+ #! pip install gradio -q
13
+
14
+ from PIL import Image
15
+ from transformers import VisionEncoderDecoderModel, ViTFeatureExtractor, PreTrainedTokenizerFast
16
+ import requests
17
+
18
+ model = VisionEncoderDecoderModel.from_pretrained("sachin/vit2distilgpt2")
19
+
20
+ vit_feature_extractor = ViTFeatureExtractor.from_pretrained("google/vit-base-patch16-224-in21k")
21
+
22
+ tokenizer = PreTrainedTokenizerFast.from_pretrained("distilgpt2")
23
+
24
+ # url = 'https://d2gp644kobdlm6.cloudfront.net/wp-content/uploads/2016/06/bigstock-Shocked-and-surprised-boy-on-t-113798588-300x212.jpg'
25
+
26
+ # with Image.open(requests.get(url, stream=True).raw) as img:
27
+ # pixel_values = vit_feature_extractor(images=img, return_tensors="pt").pixel_values
28
+
29
+ #encoder_outputs = model.generate(pixel_values.to('cpu'),num_beams=5)
30
+
31
+ #generated_sentences = tokenizer.batch_decode(encoder_outputs, skip_special_tokens=True)
32
+
33
+ #generated_sentences
34
+
35
+ #naive text processing
36
+ #generated_sentences[0].split('.')[0]
37
+
38
+ # inference function
39
+
40
+ def vit2distilgpt2(img):
41
+ pixel_values = vit_feature_extractor(images=img, return_tensors="pt").pixel_values
42
+ encoder_outputs = generated_ids = model.generate(pixel_values.to('cpu'),num_beams=5)
43
+ generated_sentences = tokenizer.batch_decode(encoder_outputs, skip_special_tokens=True)
44
+
45
+ return(generated_sentences[0].split('.')[0])
46
+
47
+ #!wget https://media.glamour.com/photos/5f171c4fd35176eaedb36823/master/w_2560%2Cc_limit/bike.jpg
48
+
49
+ import gradio as gr
50
+
51
+ inputs = [
52
+ gr.inputs.Image(type="pil", label="Original Image")
53
+ ]
54
+
55
+ outputs = [
56
+ gr.outputs.Textbox(label = 'Caption')
57
+ ]
58
+
59
+ title = "Image Captioning using ViT + GPT2"
60
+ description = "ViT and GPT2 are used to generate Image Caption for the uploaded image. COCO Dataset was used for training. This image captioning model might have some biases that we couldn't figure during our stress testing, so if you find any bias (gender, race and so on) please use `Flag` button to flag the image with bias"
61
+ article = " <a href='https://huggingface.co/sachin/vit2distilgpt2'>Model Repo on Hugging Face Model Hub</a>"
62
+ examples = [
63
+ ["people-walking-street-pedestrian-crossing-traffic-light-city.jpeg"],
64
+ ["elonmusk.jpeg"]
65
+
66
+ ]
67
+
68
+ gr.Interface(
69
+ vit2distilgpt2,
70
+ inputs,
71
+ outputs,
72
+ title=title,
73
+ description=description,
74
+ article=article,
75
+ examples=examples,
76
+ theme="huggingface",
77
+ ).launch(debug=True, enable_queue=True)
78
+
elonmusk.jpeg ADDED
people-walking-street-pedestrian-crossing-traffic-light-city.jpeg ADDED
requirements.txt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ torch
2
+ transformers
3
+ Pillow