SumanthKarnati commited on
Commit
bc81463
·
1 Parent(s): 89d253c

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +72 -1
app.py CHANGED
@@ -1,3 +1,74 @@
1
  import gradio as gr
 
 
 
 
 
 
 
2
 
3
- gr.Interface.load("models/SumanthKarnati/Image2Ingredients").launch()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  import gradio as gr
2
+ import os
3
+ import nltk
4
+ from transformers import VisionEncoderDecoderModel, AutoTokenizer, ViTImageProcessor, pipeline
5
+ import torch
6
+ from PIL import Image
7
+ from nltk.corpus import stopwords
8
+ from io import BytesIO
9
 
10
+ nltk.download('stopwords')
11
+
12
+ model = VisionEncoderDecoderModel.from_pretrained("SumanthKarnati/Image2Ingredients")
13
+ model.eval()
14
+
15
+ feature_extractor = ViTImageProcessor.from_pretrained('nlpconnect/vit-gpt2-image-captioning')
16
+
17
+ tokenizer = AutoTokenizer.from_pretrained('nlpconnect/vit-gpt2-image-captioning')
18
+
19
+ generator = pipeline('text-generation', model='EleutherAI/gpt-neo-2.7B')
20
+
21
+ device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
22
+
23
+ model = model.to(device)
24
+
25
+ max_length = 16
26
+ num_beams = 4
27
+ gen_kwargs = {"max_length": max_length, "num_beams": num_beams}
28
+
29
+ stop_words = set(stopwords.words('english'))
30
+
31
+ def remove_stop_words(word_list):
32
+ return [word for word in word_list if word not in stop_words]
33
+
34
+ def predict_step(image_files, model, feature_extractor, tokenizer, device, gen_kwargs):
35
+ images = []
36
+ for image_file in image_files:
37
+ if image_file is not None:
38
+ image = Image.open(image_file.name)
39
+ if image.mode != "RGB":
40
+ image = image.convert(mode="RGB")
41
+ images.append(image)
42
+
43
+ if not images:
44
+ return None
45
+
46
+ inputs = feature_extractor(images=images, return_tensors="pt")
47
+ inputs.to(device)
48
+ output_ids = model.generate(inputs["pixel_values"], **gen_kwargs)
49
+
50
+ preds = tokenizer.batch_decode(output_ids, skip_special_tokens=True)
51
+ preds = [pred.strip() for pred in preds]
52
+ return preds
53
+
54
+ def process_image(image):
55
+ preds = predict_step([image], model, feature_extractor, tokenizer, device, gen_kwargs)
56
+
57
+ preds = preds[0].split('-')
58
+ preds = [x for x in preds if not any(c.isdigit() for c in x)]
59
+ preds = list(filter(None, preds))
60
+ preds = list(dict.fromkeys(preds))
61
+ preds = remove_stop_words(preds)
62
+
63
+ preds_str = ', '.join(preds)
64
+
65
+ prompt = f"You are a knowledgeable assistant that provides nutritional advice based on a list of ingredients. The identified ingredients are: {preds_str}. Note that some ingredients may not make sense, so use the ones that do. Can you provide a nutritional analysis and suggestions for improvement?"
66
+
67
+ suggestions = generator(prompt, do_sample=True, min_length=200)
68
+
69
+ suggestions = suggestions[0]['generated_text'][len(prompt):]
70
+
71
+ return preds, suggestions
72
+
73
+ iface = gr.Interface(fn=process_image, inputs="image", outputs=["text", "text"])
74
+ iface.launch()