rmayormartins's picture
Subindo arquivos331313
8dbeec6
raw
history blame
3.74 kB
import os
import time
import gradio as gr
import torch
from PIL import Image
from gtts import gTTS
import numpy as np
import cv2
from transformers import BlipProcessor, BlipForConditionalGeneration, AutoTokenizer, AutoModelForSeq2SeqLM
from huggingface_hub import login
#token
hf_token = os.getenv("HUGGINGFACE_TOKEN")
if hf_token:
login(token=hf_token)
#modelo YOLOv5
model = torch.hub.load('ultralytics/yolov5', 'yolov5s')
#Calcular a GLCM e o contraste
def calculate_glcm_contrast(image):
gray_image = cv2.cvtColor(np.array(image), cv2.COLOR_BGR2GRAY)
max_value = gray_image.max() + 1
glcm = np.zeros((max_value, max_value), dtype=np.float64)
for i in range(gray_image.shape[0] - 1):
for j in range(gray_image.shape[1] - 1):
x = gray_image[i, j]
y = gray_image[i + 1, j + 1]
glcm[x, y] += 1
glcm = glcm / glcm.sum()
contrast = 0.0
for i in range(max_value):
for j in range(max_value):
contrast += (i - j) ** 2 * glcm[i, j]
return contrast
#Analisar a textura e a temperatura de cor
def analyze_image_properties(image):
#cor (média RGB)
image_rgb = cv2.cvtColor(np.array(image), cv2.COLOR_BGR2RGB)
avg_color_per_row = np.average(image_rgb, axis=0)
avg_color = np.average(avg_color_per_row, axis=0)
temperature = 'fria' if np.mean(avg_color) < 128 else 'quente'
#textura
texture_contrast = calculate_glcm_contrast(image)
texture = 'lisa' if texture_contrast < 100 else 'texturizada'
return temperature, texture
#Descrever imagem com BLIP
def describe_image(image):
processor = BlipProcessor.from_pretrained("Salesforce/blip-image-captioning-base")
model = BlipForConditionalGeneration.from_pretrained("Salesforce/blip-image-captioning-base")
inputs = processor(image, return_tensors="pt")
out = model.generate(**inputs)
description = processor.decode(out[0], skip_special_tokens=True)
return description
#Traduz para .pt
def translate_description(description):
model_name = 'Helsinki-NLP/opus-mt-tc-big-en-pt'
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForSeq2SeqLM.from_pretrained(model_name)
translated = model.generate(**tokenizer(description, return_tensors="pt", padding=True))
translated_text = tokenizer.decode(translated[0], skip_special_tokens=True)
return translated_text
#Processo
def process_image(image):
# Detecta
results = model(image)
detected_image = results.render()[0]
# Análise de cor (média RGB)
mean_rgb = np.mean(np.array(image), axis=(0, 1))
# Análise de textura e temperatura de cor
temperature, texture = analyze_image_properties(image)
# Descrição da imagem
description = describe_image(image)
translated_description = translate_description(description)
# Construção
final_description = f"{translated_description}. A textura é {texture} e a temperatura de cor é {temperature}."
# Texto2voz
tts = gTTS(text=final_description, lang='pt')
attempts = 0
while attempts < 5:
try:
tts.save("output.mp3")
break
except gTTS.tts.gTTSError as e:
if e.r.status_code == 429:
print("Muitas requisicoes...")
time.sleep(5)
attempts += 1
else:
raise e
#Saída
return Image.fromarray(detected_image), final_description, "output.mp3"
#
example_image_path = "example1.JPG"
#
iface = gr.Interface(
fn=process_image,
inputs=gr.Image(type="pil"),
outputs=[gr.Image(type="pil"), gr.Textbox(), gr.Audio(type="filepath")],
examples=[example_image_path]
)
iface.launch()