Spaces:

rmayormartins
/

inclusion-visually-impaired-image2speech

Sleeping

App Files Files Community

rmayormartins commited on May 26, 2024

Commit

0a40c8a

1 Parent(s): 8dbeec6

Subindo arquivos377337

Browse files

Files changed (1) hide show

app.py +22 -20

app.py CHANGED Viewed

@@ -9,16 +9,16 @@ import cv2
 from transformers import BlipProcessor, BlipForConditionalGeneration, AutoTokenizer, AutoModelForSeq2SeqLM
 from huggingface_hub import login
-#token
 hf_token = os.getenv("HUGGINGFACE_TOKEN")
 if hf_token:
     login(token=hf_token)
-#modelo YOLOv5
 model = torch.hub.load('ultralytics/yolov5', 'yolov5s')
-#Calcular a GLCM e o contraste
 def calculate_glcm_contrast(image):
     gray_image = cv2.cvtColor(np.array(image), cv2.COLOR_BGR2GRAY)
     max_value = gray_image.max() + 1
@@ -39,21 +39,26 @@ def calculate_glcm_contrast(image):
     return contrast
-#Analisar a textura e a temperatura de cor
 def analyze_image_properties(image):
-    #cor (média RGB)
     image_rgb = cv2.cvtColor(np.array(image), cv2.COLOR_BGR2RGB)
     avg_color_per_row = np.average(image_rgb, axis=0)
     avg_color = np.average(avg_color_per_row, axis=0)
-    temperature = 'fria' if np.mean(avg_color) < 128 else 'quente'
-    #textura
     texture_contrast = calculate_glcm_contrast(image)
     texture = 'lisa' if texture_contrast < 100 else 'texturizada'
     return temperature, texture
-#Descrever imagem com BLIP
 def describe_image(image):
     processor = BlipProcessor.from_pretrained("Salesforce/blip-image-captioning-base")
     model = BlipForConditionalGeneration.from_pretrained("Salesforce/blip-image-captioning-base")
@@ -62,7 +67,7 @@ def describe_image(image):
     description = processor.decode(out[0], skip_special_tokens=True)
     return description
-#Traduz para .pt
 def translate_description(description):
     model_name = 'Helsinki-NLP/opus-mt-tc-big-en-pt'
     tokenizer = AutoTokenizer.from_pretrained(model_name)
@@ -71,15 +76,12 @@ def translate_description(description):
     translated_text = tokenizer.decode(translated[0], skip_special_tokens=True)
     return translated_text
-#Processo
 def process_image(image):
-    # Detecta
     results = model(image)
     detected_image = results.render()[0]
-    # Análise de cor (média RGB)
-    mean_rgb = np.mean(np.array(image), axis=(0, 1))
     # Análise de textura e temperatura de cor
     temperature, texture = analyze_image_properties(image)
@@ -87,10 +89,10 @@ def process_image(image):
     description = describe_image(image)
     translated_description = translate_description(description)
-    # Construção
     final_description = f"{translated_description}. A textura é {texture} e a temperatura de cor é {temperature}."
-    # Texto2voz
     tts = gTTS(text=final_description, lang='pt')
     attempts = 0
     while attempts < 5:
@@ -99,19 +101,19 @@ def process_image(image):
             break
         except gTTS.tts.gTTSError as e:
             if e.r.status_code == 429:
-                print("Muitas requisicoes...")
                 time.sleep(5)
                 attempts += 1
             else:
                 raise e
-    #Saída
     return Image.fromarray(detected_image), final_description, "output.mp3"
-#
 example_image_path = "example1.JPG"
-#
 iface = gr.Interface(
     fn=process_image,
     inputs=gr.Image(type="pil"),

 from transformers import BlipProcessor, BlipForConditionalGeneration, AutoTokenizer, AutoModelForSeq2SeqLM
 from huggingface_hub import login
+# meu token
 hf_token = os.getenv("HUGGINGFACE_TOKEN")
 if hf_token:
     login(token=hf_token)
+# YOLOv5
 model = torch.hub.load('ultralytics/yolov5', 'yolov5s')
+# Calcula a GLCM e o contraste
 def calculate_glcm_contrast(image):
     gray_image = cv2.cvtColor(np.array(image), cv2.COLOR_BGR2GRAY)
     max_value = gray_image.max() + 1
     return contrast
+# Analisar a textura e a temperatura de cor
 def analyze_image_properties(image):
+    # Análise de cor (média RGB)
     image_rgb = cv2.cvtColor(np.array(image), cv2.COLOR_BGR2RGB)
     avg_color_per_row = np.average(image_rgb, axis=0)
     avg_color = np.average(avg_color_per_row, axis=0)
+    # Determinar temperatura da cor
+    if avg_color[0] > avg_color[2]:  # Mais vermelho que azul
+        temperature = 'quente'
+    else:
+        temperature = 'fria'
+    # Análise de textura
     texture_contrast = calculate_glcm_contrast(image)
     texture = 'lisa' if texture_contrast < 100 else 'texturizada'
     return temperature, texture
+# Descrever imagem usando BLIP
 def describe_image(image):
     processor = BlipProcessor.from_pretrained("Salesforce/blip-image-captioning-base")
     model = BlipForConditionalGeneration.from_pretrained("Salesforce/blip-image-captioning-base")
     description = processor.decode(out[0], skip_special_tokens=True)
     return description
+# Traduzir descrição para pt
 def translate_description(description):
     model_name = 'Helsinki-NLP/opus-mt-tc-big-en-pt'
     tokenizer = AutoTokenizer.from_pretrained(model_name)
     translated_text = tokenizer.decode(translated[0], skip_special_tokens=True)
     return translated_text
+# Processar imagem e gerar saída de voz
 def process_image(image):
+    # Detecção de objetos
     results = model(image)
     detected_image = results.render()[0]
     # Análise de textura e temperatura de cor
     temperature, texture = analyze_image_properties(image)
     description = describe_image(image)
     translated_description = translate_description(description)
+    # Construir a descrição final
     final_description = f"{translated_description}. A textura é {texture} e a temperatura de cor é {temperature}."
+    # Texto para voz
     tts = gTTS(text=final_description, lang='pt')
     attempts = 0
     while attempts < 5:
             break
         except gTTS.tts.gTTSError as e:
             if e.r.status_code == 429:
+                print("Too many requests. Waiting before retrying...")
                 time.sleep(5)
                 attempts += 1
             else:
                 raise e
+    # Saída
     return Image.fromarray(detected_image), final_description, "output.mp3"
+#
 example_image_path = "example1.JPG"
+# Gradio
 iface = gr.Interface(
     fn=process_image,
     inputs=gr.Image(type="pil"),