Spaces:

rmayormartins
/

inclusion-visually-impaired-image2speech

Sleeping

App Files Files Community

rmayormartins commited on May 26, 2024

Commit

8dbeec6

1 Parent(s): 2f3c50a

Subindo arquivos331313

Browse files

Files changed (1) hide show

app.py +16 -16

app.py CHANGED Viewed

@@ -9,16 +9,16 @@ import cv2
 from transformers import BlipProcessor, BlipForConditionalGeneration, AutoTokenizer, AutoModelForSeq2SeqLM
 from huggingface_hub import login
-# Ler o token da variável de ambiente
 hf_token = os.getenv("HUGGINGFACE_TOKEN")
 if hf_token:
     login(token=hf_token)
-# Carregar o modelo YOLOv5
 model = torch.hub.load('ultralytics/yolov5', 'yolov5s')
-# Função para calcular a GLCM e o contraste manualmente
 def calculate_glcm_contrast(image):
     gray_image = cv2.cvtColor(np.array(image), cv2.COLOR_BGR2GRAY)
     max_value = gray_image.max() + 1
@@ -39,21 +39,21 @@ def calculate_glcm_contrast(image):
     return contrast
-# Função para analisar a textura e a temperatura de cor
 def analyze_image_properties(image):
-    # Análise de cor (média RGB)
     image_rgb = cv2.cvtColor(np.array(image), cv2.COLOR_BGR2RGB)
     avg_color_per_row = np.average(image_rgb, axis=0)
     avg_color = np.average(avg_color_per_row, axis=0)
     temperature = 'fria' if np.mean(avg_color) < 128 else 'quente'
-    # Análise de textura
     texture_contrast = calculate_glcm_contrast(image)
     texture = 'lisa' if texture_contrast < 100 else 'texturizada'
     return temperature, texture
-# Função para descrever imagem usando BLIP
 def describe_image(image):
     processor = BlipProcessor.from_pretrained("Salesforce/blip-image-captioning-base")
     model = BlipForConditionalGeneration.from_pretrained("Salesforce/blip-image-captioning-base")
@@ -62,7 +62,7 @@ def describe_image(image):
     description = processor.decode(out[0], skip_special_tokens=True)
     return description
-# Função para traduzir descrição para português
 def translate_description(description):
     model_name = 'Helsinki-NLP/opus-mt-tc-big-en-pt'
     tokenizer = AutoTokenizer.from_pretrained(model_name)
@@ -71,9 +71,9 @@ def translate_description(description):
     translated_text = tokenizer.decode(translated[0], skip_special_tokens=True)
     return translated_text
-# Função principal para processar imagem e gerar saída de voz
 def process_image(image):
-    # Detecção de objetos
     results = model(image)
     detected_image = results.render()[0]
@@ -87,10 +87,10 @@ def process_image(image):
     description = describe_image(image)
     translated_description = translate_description(description)
-    # Construir a descrição final
     final_description = f"{translated_description}. A textura é {texture} e a temperatura de cor é {temperature}."
-    # Texto para voz
     tts = gTTS(text=final_description, lang='pt')
     attempts = 0
     while attempts < 5:
@@ -99,19 +99,19 @@ def process_image(image):
             break
         except gTTS.tts.gTTSError as e:
             if e.r.status_code == 429:
-                print("Too many requests. Waiting before retrying...")
                 time.sleep(5)
                 attempts += 1
             else:
                 raise e
-    # Retornar imagem com detecções, descrição e áudio
     return Image.fromarray(detected_image), final_description, "output.mp3"
-# Carregar imagem de exemplo diretamente do código
 example_image_path = "example1.JPG"
-# Interface Gradio
 iface = gr.Interface(
     fn=process_image,
     inputs=gr.Image(type="pil"),

 from transformers import BlipProcessor, BlipForConditionalGeneration, AutoTokenizer, AutoModelForSeq2SeqLM
 from huggingface_hub import login
+#token
 hf_token = os.getenv("HUGGINGFACE_TOKEN")
 if hf_token:
     login(token=hf_token)
+#modelo YOLOv5
 model = torch.hub.load('ultralytics/yolov5', 'yolov5s')
+#Calcular a GLCM e o contraste
 def calculate_glcm_contrast(image):
     gray_image = cv2.cvtColor(np.array(image), cv2.COLOR_BGR2GRAY)
     max_value = gray_image.max() + 1
     return contrast
+#Analisar a textura e a temperatura de cor
 def analyze_image_properties(image):
+    #cor (média RGB)
     image_rgb = cv2.cvtColor(np.array(image), cv2.COLOR_BGR2RGB)
     avg_color_per_row = np.average(image_rgb, axis=0)
     avg_color = np.average(avg_color_per_row, axis=0)
     temperature = 'fria' if np.mean(avg_color) < 128 else 'quente'
+    #textura
     texture_contrast = calculate_glcm_contrast(image)
     texture = 'lisa' if texture_contrast < 100 else 'texturizada'
     return temperature, texture
+#Descrever imagem com BLIP
 def describe_image(image):
     processor = BlipProcessor.from_pretrained("Salesforce/blip-image-captioning-base")
     model = BlipForConditionalGeneration.from_pretrained("Salesforce/blip-image-captioning-base")
     description = processor.decode(out[0], skip_special_tokens=True)
     return description
+#Traduz para .pt
 def translate_description(description):
     model_name = 'Helsinki-NLP/opus-mt-tc-big-en-pt'
     tokenizer = AutoTokenizer.from_pretrained(model_name)
     translated_text = tokenizer.decode(translated[0], skip_special_tokens=True)
     return translated_text
+#Processo
 def process_image(image):
+    # Detecta
     results = model(image)
     detected_image = results.render()[0]
     description = describe_image(image)
     translated_description = translate_description(description)
+    # Construção
     final_description = f"{translated_description}. A textura é {texture} e a temperatura de cor é {temperature}."
+    # Texto2voz
     tts = gTTS(text=final_description, lang='pt')
     attempts = 0
     while attempts < 5:
             break
         except gTTS.tts.gTTSError as e:
             if e.r.status_code == 429:
+                print("Muitas requisicoes...")
                 time.sleep(5)
                 attempts += 1
             else:
                 raise e
+    #Saída
     return Image.fromarray(detected_image), final_description, "output.mp3"
+#
 example_image_path = "example1.JPG"
+#
 iface = gr.Interface(
     fn=process_image,
     inputs=gr.Image(type="pil"),