Commit
·
0a40c8a
1
Parent(s):
8dbeec6
Subindo arquivos377337
Browse files
app.py
CHANGED
@@ -9,16 +9,16 @@ import cv2
|
|
9 |
from transformers import BlipProcessor, BlipForConditionalGeneration, AutoTokenizer, AutoModelForSeq2SeqLM
|
10 |
from huggingface_hub import login
|
11 |
|
12 |
-
#token
|
13 |
hf_token = os.getenv("HUGGINGFACE_TOKEN")
|
14 |
|
15 |
if hf_token:
|
16 |
login(token=hf_token)
|
17 |
|
18 |
-
#
|
19 |
model = torch.hub.load('ultralytics/yolov5', 'yolov5s')
|
20 |
|
21 |
-
#
|
22 |
def calculate_glcm_contrast(image):
|
23 |
gray_image = cv2.cvtColor(np.array(image), cv2.COLOR_BGR2GRAY)
|
24 |
max_value = gray_image.max() + 1
|
@@ -39,21 +39,26 @@ def calculate_glcm_contrast(image):
|
|
39 |
|
40 |
return contrast
|
41 |
|
42 |
-
#Analisar a textura e a temperatura de cor
|
43 |
def analyze_image_properties(image):
|
44 |
-
#cor (média RGB)
|
45 |
image_rgb = cv2.cvtColor(np.array(image), cv2.COLOR_BGR2RGB)
|
46 |
avg_color_per_row = np.average(image_rgb, axis=0)
|
47 |
avg_color = np.average(avg_color_per_row, axis=0)
|
48 |
-
|
|
|
|
|
|
|
|
|
|
|
49 |
|
50 |
-
#textura
|
51 |
texture_contrast = calculate_glcm_contrast(image)
|
52 |
texture = 'lisa' if texture_contrast < 100 else 'texturizada'
|
53 |
|
54 |
return temperature, texture
|
55 |
|
56 |
-
#Descrever imagem
|
57 |
def describe_image(image):
|
58 |
processor = BlipProcessor.from_pretrained("Salesforce/blip-image-captioning-base")
|
59 |
model = BlipForConditionalGeneration.from_pretrained("Salesforce/blip-image-captioning-base")
|
@@ -62,7 +67,7 @@ def describe_image(image):
|
|
62 |
description = processor.decode(out[0], skip_special_tokens=True)
|
63 |
return description
|
64 |
|
65 |
-
#
|
66 |
def translate_description(description):
|
67 |
model_name = 'Helsinki-NLP/opus-mt-tc-big-en-pt'
|
68 |
tokenizer = AutoTokenizer.from_pretrained(model_name)
|
@@ -71,15 +76,12 @@ def translate_description(description):
|
|
71 |
translated_text = tokenizer.decode(translated[0], skip_special_tokens=True)
|
72 |
return translated_text
|
73 |
|
74 |
-
#
|
75 |
def process_image(image):
|
76 |
-
#
|
77 |
results = model(image)
|
78 |
detected_image = results.render()[0]
|
79 |
|
80 |
-
# Análise de cor (média RGB)
|
81 |
-
mean_rgb = np.mean(np.array(image), axis=(0, 1))
|
82 |
-
|
83 |
# Análise de textura e temperatura de cor
|
84 |
temperature, texture = analyze_image_properties(image)
|
85 |
|
@@ -87,10 +89,10 @@ def process_image(image):
|
|
87 |
description = describe_image(image)
|
88 |
translated_description = translate_description(description)
|
89 |
|
90 |
-
#
|
91 |
final_description = f"{translated_description}. A textura é {texture} e a temperatura de cor é {temperature}."
|
92 |
|
93 |
-
#
|
94 |
tts = gTTS(text=final_description, lang='pt')
|
95 |
attempts = 0
|
96 |
while attempts < 5:
|
@@ -99,19 +101,19 @@ def process_image(image):
|
|
99 |
break
|
100 |
except gTTS.tts.gTTSError as e:
|
101 |
if e.r.status_code == 429:
|
102 |
-
print("
|
103 |
time.sleep(5)
|
104 |
attempts += 1
|
105 |
else:
|
106 |
raise e
|
107 |
|
108 |
-
#Saída
|
109 |
return Image.fromarray(detected_image), final_description, "output.mp3"
|
110 |
|
111 |
-
#
|
112 |
example_image_path = "example1.JPG"
|
113 |
|
114 |
-
#
|
115 |
iface = gr.Interface(
|
116 |
fn=process_image,
|
117 |
inputs=gr.Image(type="pil"),
|
|
|
9 |
from transformers import BlipProcessor, BlipForConditionalGeneration, AutoTokenizer, AutoModelForSeq2SeqLM
|
10 |
from huggingface_hub import login
|
11 |
|
12 |
+
# meu token
|
13 |
hf_token = os.getenv("HUGGINGFACE_TOKEN")
|
14 |
|
15 |
if hf_token:
|
16 |
login(token=hf_token)
|
17 |
|
18 |
+
# YOLOv5
|
19 |
model = torch.hub.load('ultralytics/yolov5', 'yolov5s')
|
20 |
|
21 |
+
# Calcula a GLCM e o contraste
|
22 |
def calculate_glcm_contrast(image):
|
23 |
gray_image = cv2.cvtColor(np.array(image), cv2.COLOR_BGR2GRAY)
|
24 |
max_value = gray_image.max() + 1
|
|
|
39 |
|
40 |
return contrast
|
41 |
|
42 |
+
# Analisar a textura e a temperatura de cor
|
43 |
def analyze_image_properties(image):
|
44 |
+
# Análise de cor (média RGB)
|
45 |
image_rgb = cv2.cvtColor(np.array(image), cv2.COLOR_BGR2RGB)
|
46 |
avg_color_per_row = np.average(image_rgb, axis=0)
|
47 |
avg_color = np.average(avg_color_per_row, axis=0)
|
48 |
+
|
49 |
+
# Determinar temperatura da cor
|
50 |
+
if avg_color[0] > avg_color[2]: # Mais vermelho que azul
|
51 |
+
temperature = 'quente'
|
52 |
+
else:
|
53 |
+
temperature = 'fria'
|
54 |
|
55 |
+
# Análise de textura
|
56 |
texture_contrast = calculate_glcm_contrast(image)
|
57 |
texture = 'lisa' if texture_contrast < 100 else 'texturizada'
|
58 |
|
59 |
return temperature, texture
|
60 |
|
61 |
+
# Descrever imagem usando BLIP
|
62 |
def describe_image(image):
|
63 |
processor = BlipProcessor.from_pretrained("Salesforce/blip-image-captioning-base")
|
64 |
model = BlipForConditionalGeneration.from_pretrained("Salesforce/blip-image-captioning-base")
|
|
|
67 |
description = processor.decode(out[0], skip_special_tokens=True)
|
68 |
return description
|
69 |
|
70 |
+
# Traduzir descrição para pt
|
71 |
def translate_description(description):
|
72 |
model_name = 'Helsinki-NLP/opus-mt-tc-big-en-pt'
|
73 |
tokenizer = AutoTokenizer.from_pretrained(model_name)
|
|
|
76 |
translated_text = tokenizer.decode(translated[0], skip_special_tokens=True)
|
77 |
return translated_text
|
78 |
|
79 |
+
# Processar imagem e gerar saída de voz
|
80 |
def process_image(image):
|
81 |
+
# Detecção de objetos
|
82 |
results = model(image)
|
83 |
detected_image = results.render()[0]
|
84 |
|
|
|
|
|
|
|
85 |
# Análise de textura e temperatura de cor
|
86 |
temperature, texture = analyze_image_properties(image)
|
87 |
|
|
|
89 |
description = describe_image(image)
|
90 |
translated_description = translate_description(description)
|
91 |
|
92 |
+
# Construir a descrição final
|
93 |
final_description = f"{translated_description}. A textura é {texture} e a temperatura de cor é {temperature}."
|
94 |
|
95 |
+
# Texto para voz
|
96 |
tts = gTTS(text=final_description, lang='pt')
|
97 |
attempts = 0
|
98 |
while attempts < 5:
|
|
|
101 |
break
|
102 |
except gTTS.tts.gTTSError as e:
|
103 |
if e.r.status_code == 429:
|
104 |
+
print("Too many requests. Waiting before retrying...")
|
105 |
time.sleep(5)
|
106 |
attempts += 1
|
107 |
else:
|
108 |
raise e
|
109 |
|
110 |
+
# Saída
|
111 |
return Image.fromarray(detected_image), final_description, "output.mp3"
|
112 |
|
113 |
+
#
|
114 |
example_image_path = "example1.JPG"
|
115 |
|
116 |
+
# Gradio
|
117 |
iface = gr.Interface(
|
118 |
fn=process_image,
|
119 |
inputs=gr.Image(type="pil"),
|