Spaces:
Sleeping
Sleeping
File size: 2,861 Bytes
183ba69 471f43d 817e7fd 389a29c 4273fa3 817e7fd 471f43d 817e7fd 471f43d 247c724 9d4c268 471f43d 817e7fd 471f43d 183ba69 247c724 817e7fd 247c724 389a29c 817e7fd 247c724 817e7fd 389a29c 817e7fd 247c724 389a29c 183ba69 817e7fd 247c724 817e7fd 183ba69 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 |
import gradio as gr
from PIL import Image
from transformers import BlipProcessor, BlipForConditionalGeneration, pipeline
import time
# Carregando o modelo BLIP para geração de legendas
processor = BlipProcessor.from_pretrained("Salesforce/blip-image-captioning-large")
model_blip = BlipForConditionalGeneration.from_pretrained("Salesforce/blip-image-captioning-large")
# Carregando um modelo de geração de texto (exemplo: GPT-2)
generator = pipeline('text-generation', model='gpt2')
# Função para gerar legenda da imagem
def caption(img, min_len, max_len):
raw_image = Image.open(img).convert('RGB')
inputs = processor(raw_image, return_tensors="pt")
out = model_blip.generate(**inputs, min_length=min_len, max_length=max_len)
return processor.decode(out[0], skip_special_tokens=True)
# Função para gerar informações nutricionais e calorias
def generate_nutritional_info(food_description, language):
if language == "Português":
prompt = f"Descreva as informações nutricionais e as calorias do seguinte alimento: {food_description}."
else:
prompt = f"Provide detailed nutritional information and calories for the following food: {food_description}."
result = generator(prompt, max_length=150, num_return_sequences=1)
return result[0]['generated_text']
# Função principal que combina tudo
def greet(img, min_len, max_len, language):
start = time.time()
# Passo 1: Gerar legenda para a imagem
food_description = caption(img, min_len, max_len)
# Passo 2: Gerar informações nutricionais e calorias com base na legenda
nutritional_info = generate_nutritional_info(food_description, language)
end = time.time()
total_time = str(end - start)
# Combinando resultados
if language == "Português":
result = f"Descrição do Alimento: {food_description}\n\nInformações Nutricionais:\n{nutritional_info}\n\nGerado em {total_time} segundos."
else:
result = f"Food Description: {food_description}\n\nNutritional Information:\n{nutritional_info}\n\nGenerated in {total_time} seconds."
return result
# Interface Gradio
iface = gr.Interface(
fn=greet,
title='Nutritionist Agent with BLIP and GPT-2',
description="Upload an image of food, and the agent will describe it and provide nutritional information.",
inputs=[
gr.Image(type='filepath', label='Image'),
gr.Slider(label='Minimum Length', minimum=1, maximum=1000, value=30),
gr.Slider(label='Maximum Length', minimum=1, maximum=1000, value=100),
gr.Radio(choices=["Português", "English"], label="Language", value="Português") # Botão de seleção de idioma
],
outputs=gr.Textbox(label='Result'),
theme=gr.themes.Base(primary_hue="teal", secondary_hue="teal", neutral_hue="slate"),
)
iface.launch() |