rmayormartins commited on
Commit
8dbeec6
·
1 Parent(s): 2f3c50a

Subindo arquivos331313

Browse files
Files changed (1) hide show
  1. app.py +16 -16
app.py CHANGED
@@ -9,16 +9,16 @@ import cv2
9
  from transformers import BlipProcessor, BlipForConditionalGeneration, AutoTokenizer, AutoModelForSeq2SeqLM
10
  from huggingface_hub import login
11
 
12
- # Ler o token da variável de ambiente
13
  hf_token = os.getenv("HUGGINGFACE_TOKEN")
14
 
15
  if hf_token:
16
  login(token=hf_token)
17
 
18
- # Carregar o modelo YOLOv5
19
  model = torch.hub.load('ultralytics/yolov5', 'yolov5s')
20
 
21
- # Função para calcular a GLCM e o contraste manualmente
22
  def calculate_glcm_contrast(image):
23
  gray_image = cv2.cvtColor(np.array(image), cv2.COLOR_BGR2GRAY)
24
  max_value = gray_image.max() + 1
@@ -39,21 +39,21 @@ def calculate_glcm_contrast(image):
39
 
40
  return contrast
41
 
42
- # Função para analisar a textura e a temperatura de cor
43
  def analyze_image_properties(image):
44
- # Análise de cor (média RGB)
45
  image_rgb = cv2.cvtColor(np.array(image), cv2.COLOR_BGR2RGB)
46
  avg_color_per_row = np.average(image_rgb, axis=0)
47
  avg_color = np.average(avg_color_per_row, axis=0)
48
  temperature = 'fria' if np.mean(avg_color) < 128 else 'quente'
49
 
50
- # Análise de textura
51
  texture_contrast = calculate_glcm_contrast(image)
52
  texture = 'lisa' if texture_contrast < 100 else 'texturizada'
53
 
54
  return temperature, texture
55
 
56
- # Função para descrever imagem usando BLIP
57
  def describe_image(image):
58
  processor = BlipProcessor.from_pretrained("Salesforce/blip-image-captioning-base")
59
  model = BlipForConditionalGeneration.from_pretrained("Salesforce/blip-image-captioning-base")
@@ -62,7 +62,7 @@ def describe_image(image):
62
  description = processor.decode(out[0], skip_special_tokens=True)
63
  return description
64
 
65
- # Função para traduzir descrição para português
66
  def translate_description(description):
67
  model_name = 'Helsinki-NLP/opus-mt-tc-big-en-pt'
68
  tokenizer = AutoTokenizer.from_pretrained(model_name)
@@ -71,9 +71,9 @@ def translate_description(description):
71
  translated_text = tokenizer.decode(translated[0], skip_special_tokens=True)
72
  return translated_text
73
 
74
- # Função principal para processar imagem e gerar saída de voz
75
  def process_image(image):
76
- # Detecção de objetos
77
  results = model(image)
78
  detected_image = results.render()[0]
79
 
@@ -87,10 +87,10 @@ def process_image(image):
87
  description = describe_image(image)
88
  translated_description = translate_description(description)
89
 
90
- # Construir a descrição final
91
  final_description = f"{translated_description}. A textura é {texture} e a temperatura de cor é {temperature}."
92
 
93
- # Texto para voz
94
  tts = gTTS(text=final_description, lang='pt')
95
  attempts = 0
96
  while attempts < 5:
@@ -99,19 +99,19 @@ def process_image(image):
99
  break
100
  except gTTS.tts.gTTSError as e:
101
  if e.r.status_code == 429:
102
- print("Too many requests. Waiting before retrying...")
103
  time.sleep(5)
104
  attempts += 1
105
  else:
106
  raise e
107
 
108
- # Retornar imagem com detecções, descrição e áudio
109
  return Image.fromarray(detected_image), final_description, "output.mp3"
110
 
111
- # Carregar imagem de exemplo diretamente do código
112
  example_image_path = "example1.JPG"
113
 
114
- # Interface Gradio
115
  iface = gr.Interface(
116
  fn=process_image,
117
  inputs=gr.Image(type="pil"),
 
9
  from transformers import BlipProcessor, BlipForConditionalGeneration, AutoTokenizer, AutoModelForSeq2SeqLM
10
  from huggingface_hub import login
11
 
12
+ #token
13
  hf_token = os.getenv("HUGGINGFACE_TOKEN")
14
 
15
  if hf_token:
16
  login(token=hf_token)
17
 
18
+ #modelo YOLOv5
19
  model = torch.hub.load('ultralytics/yolov5', 'yolov5s')
20
 
21
+ #Calcular a GLCM e o contraste
22
  def calculate_glcm_contrast(image):
23
  gray_image = cv2.cvtColor(np.array(image), cv2.COLOR_BGR2GRAY)
24
  max_value = gray_image.max() + 1
 
39
 
40
  return contrast
41
 
42
+ #Analisar a textura e a temperatura de cor
43
  def analyze_image_properties(image):
44
+ #cor (média RGB)
45
  image_rgb = cv2.cvtColor(np.array(image), cv2.COLOR_BGR2RGB)
46
  avg_color_per_row = np.average(image_rgb, axis=0)
47
  avg_color = np.average(avg_color_per_row, axis=0)
48
  temperature = 'fria' if np.mean(avg_color) < 128 else 'quente'
49
 
50
+ #textura
51
  texture_contrast = calculate_glcm_contrast(image)
52
  texture = 'lisa' if texture_contrast < 100 else 'texturizada'
53
 
54
  return temperature, texture
55
 
56
+ #Descrever imagem com BLIP
57
  def describe_image(image):
58
  processor = BlipProcessor.from_pretrained("Salesforce/blip-image-captioning-base")
59
  model = BlipForConditionalGeneration.from_pretrained("Salesforce/blip-image-captioning-base")
 
62
  description = processor.decode(out[0], skip_special_tokens=True)
63
  return description
64
 
65
+ #Traduz para .pt
66
  def translate_description(description):
67
  model_name = 'Helsinki-NLP/opus-mt-tc-big-en-pt'
68
  tokenizer = AutoTokenizer.from_pretrained(model_name)
 
71
  translated_text = tokenizer.decode(translated[0], skip_special_tokens=True)
72
  return translated_text
73
 
74
+ #Processo
75
  def process_image(image):
76
+ # Detecta
77
  results = model(image)
78
  detected_image = results.render()[0]
79
 
 
87
  description = describe_image(image)
88
  translated_description = translate_description(description)
89
 
90
+ # Construção
91
  final_description = f"{translated_description}. A textura é {texture} e a temperatura de cor é {temperature}."
92
 
93
+ # Texto2voz
94
  tts = gTTS(text=final_description, lang='pt')
95
  attempts = 0
96
  while attempts < 5:
 
99
  break
100
  except gTTS.tts.gTTSError as e:
101
  if e.r.status_code == 429:
102
+ print("Muitas requisicoes...")
103
  time.sleep(5)
104
  attempts += 1
105
  else:
106
  raise e
107
 
108
+ #Saída
109
  return Image.fromarray(detected_image), final_description, "output.mp3"
110
 
111
+ #
112
  example_image_path = "example1.JPG"
113
 
114
+ #
115
  iface = gr.Interface(
116
  fn=process_image,
117
  inputs=gr.Image(type="pil"),