Spaces:
Runtime error
Runtime error
import os | |
import time | |
import gradio as gr | |
import torch | |
from PIL import Image | |
from gtts import gTTS | |
import numpy as np | |
import cv2 | |
from transformers import BlipProcessor, BlipForConditionalGeneration, AutoTokenizer, AutoModelForSeq2SeqLM | |
from huggingface_hub import login | |
# meu tokennnup | |
hf_token = os.getenv("HUGGINGFACE_TOKEN") | |
if hf_token: | |
login(token=hf_token) | |
# YOLOv5 | |
model = torch.hub.load('ultralytics/yolov5', 'yolov5s') | |
# Calcula a GLCM e o contraste | |
def calculate_glcm_contrast(image): | |
gray_image = cv2.cvtColor(np.array(image), cv2.COLOR_BGR2GRAY) | |
max_value = gray_image.max() + 1 | |
glcm = np.zeros((max_value, max_value), dtype=np.float64) | |
for i in range(gray_image.shape[0] - 1): | |
for j in range(gray_image.shape[1] - 1): | |
x = gray_image[i, j] | |
y = gray_image[i + 1, j + 1] | |
glcm[x, y] += 1 | |
glcm = glcm / glcm.sum() | |
contrast = 0.0 | |
for i in range(max_value): | |
for j in range(max_value): | |
contrast += (i - j) ** 2 * glcm[i, j] | |
return contrast | |
# Analisar a textura e a temperatura de cor | |
def analyze_image_properties(image): | |
# Análise de cor (média RGB) | |
image_rgb = cv2.cvtColor(np.array(image), cv2.COLOR_BGR2RGB) | |
avg_color_per_row = np.average(image_rgb, axis=0) | |
avg_color = np.average(avg_color_per_row, axis=0) | |
# Determinar temperatura da cor | |
if avg_color[0] > avg_color[2]: # Mais vermelho que azul | |
temperature = 'quente' | |
else: | |
temperature = 'fria' | |
# Análise de textura | |
texture_contrast = calculate_glcm_contrast(image) | |
texture = 'lisa' if texture_contrast < 100 else 'texturizada' | |
return temperature, texture | |
# Descrever imagem usando BLIP | |
def describe_image(image): | |
processor = BlipProcessor.from_pretrained("Salesforce/blip-image-captioning-base") | |
model = BlipForConditionalGeneration.from_pretrained("Salesforce/blip-image-captioning-base") | |
inputs = processor(image, return_tensors="pt") | |
out = model.generate(**inputs) | |
description = processor.decode(out[0], skip_special_tokens=True) | |
return description | |
# Traduzir descrição para pt | |
def translate_description(description): | |
model_name = 'Helsinki-NLP/opus-mt-tc-big-en-pt' | |
tokenizer = AutoTokenizer.from_pretrained(model_name) | |
model = AutoModelForSeq2SeqLM.from_pretrained(model_name) | |
translated = model.generate(**tokenizer(description, return_tensors="pt", padding=True)) | |
translated_text = tokenizer.decode(translated[0], skip_special_tokens=True) | |
return translated_text | |
# Processar imagem e gerar saída de voz | |
def process_image(image): | |
# Detecção de objetos | |
results = model(image) | |
detected_image = results.render()[0] | |
# Análise de textura e temperatura de cor | |
temperature, texture = analyze_image_properties(image) | |
# Descrição da imagem | |
description = describe_image(image) | |
translated_description = translate_description(description) | |
# Construir a descrição final | |
final_description = f"{translated_description}. A textura é {texture} e a temperatura de cor é {temperature}." | |
# Texto para voz | |
tts = gTTS(text=final_description, lang='pt') | |
attempts = 0 | |
while attempts < 5: | |
try: | |
tts.save("output.mp3") | |
break | |
except gTTS.tts.gTTSError as e: | |
if e.r.status_code == 429: | |
print("Too many requests. Waiting before retrying...") | |
time.sleep(5) | |
attempts += 1 | |
else: | |
raise e | |
# Saída | |
return Image.fromarray(detected_image), final_description, "output.mp3" | |
# | |
example_image_path = "example1.JPG" | |
# Gradio | |
iface = gr.Interface( | |
fn=process_image, | |
inputs=gr.Image(type="pil"), | |
outputs=[gr.Image(type="pil"), gr.Textbox(), gr.Audio(type="filepath")], | |
examples=[example_image_path] | |
) | |
iface.launch() | |