rmayormartins commited on
Commit
0a40c8a
·
1 Parent(s): 8dbeec6

Subindo arquivos377337

Browse files
Files changed (1) hide show
  1. app.py +22 -20
app.py CHANGED
@@ -9,16 +9,16 @@ import cv2
9
  from transformers import BlipProcessor, BlipForConditionalGeneration, AutoTokenizer, AutoModelForSeq2SeqLM
10
  from huggingface_hub import login
11
 
12
- #token
13
  hf_token = os.getenv("HUGGINGFACE_TOKEN")
14
 
15
  if hf_token:
16
  login(token=hf_token)
17
 
18
- #modelo YOLOv5
19
  model = torch.hub.load('ultralytics/yolov5', 'yolov5s')
20
 
21
- #Calcular a GLCM e o contraste
22
  def calculate_glcm_contrast(image):
23
  gray_image = cv2.cvtColor(np.array(image), cv2.COLOR_BGR2GRAY)
24
  max_value = gray_image.max() + 1
@@ -39,21 +39,26 @@ def calculate_glcm_contrast(image):
39
 
40
  return contrast
41
 
42
- #Analisar a textura e a temperatura de cor
43
  def analyze_image_properties(image):
44
- #cor (média RGB)
45
  image_rgb = cv2.cvtColor(np.array(image), cv2.COLOR_BGR2RGB)
46
  avg_color_per_row = np.average(image_rgb, axis=0)
47
  avg_color = np.average(avg_color_per_row, axis=0)
48
- temperature = 'fria' if np.mean(avg_color) < 128 else 'quente'
 
 
 
 
 
49
 
50
- #textura
51
  texture_contrast = calculate_glcm_contrast(image)
52
  texture = 'lisa' if texture_contrast < 100 else 'texturizada'
53
 
54
  return temperature, texture
55
 
56
- #Descrever imagem com BLIP
57
  def describe_image(image):
58
  processor = BlipProcessor.from_pretrained("Salesforce/blip-image-captioning-base")
59
  model = BlipForConditionalGeneration.from_pretrained("Salesforce/blip-image-captioning-base")
@@ -62,7 +67,7 @@ def describe_image(image):
62
  description = processor.decode(out[0], skip_special_tokens=True)
63
  return description
64
 
65
- #Traduz para .pt
66
  def translate_description(description):
67
  model_name = 'Helsinki-NLP/opus-mt-tc-big-en-pt'
68
  tokenizer = AutoTokenizer.from_pretrained(model_name)
@@ -71,15 +76,12 @@ def translate_description(description):
71
  translated_text = tokenizer.decode(translated[0], skip_special_tokens=True)
72
  return translated_text
73
 
74
- #Processo
75
  def process_image(image):
76
- # Detecta
77
  results = model(image)
78
  detected_image = results.render()[0]
79
 
80
- # Análise de cor (média RGB)
81
- mean_rgb = np.mean(np.array(image), axis=(0, 1))
82
-
83
  # Análise de textura e temperatura de cor
84
  temperature, texture = analyze_image_properties(image)
85
 
@@ -87,10 +89,10 @@ def process_image(image):
87
  description = describe_image(image)
88
  translated_description = translate_description(description)
89
 
90
- # Construção
91
  final_description = f"{translated_description}. A textura é {texture} e a temperatura de cor é {temperature}."
92
 
93
- # Texto2voz
94
  tts = gTTS(text=final_description, lang='pt')
95
  attempts = 0
96
  while attempts < 5:
@@ -99,19 +101,19 @@ def process_image(image):
99
  break
100
  except gTTS.tts.gTTSError as e:
101
  if e.r.status_code == 429:
102
- print("Muitas requisicoes...")
103
  time.sleep(5)
104
  attempts += 1
105
  else:
106
  raise e
107
 
108
- #Saída
109
  return Image.fromarray(detected_image), final_description, "output.mp3"
110
 
111
- #
112
  example_image_path = "example1.JPG"
113
 
114
- #
115
  iface = gr.Interface(
116
  fn=process_image,
117
  inputs=gr.Image(type="pil"),
 
9
  from transformers import BlipProcessor, BlipForConditionalGeneration, AutoTokenizer, AutoModelForSeq2SeqLM
10
  from huggingface_hub import login
11
 
12
+ # meu token
13
  hf_token = os.getenv("HUGGINGFACE_TOKEN")
14
 
15
  if hf_token:
16
  login(token=hf_token)
17
 
18
+ # YOLOv5
19
  model = torch.hub.load('ultralytics/yolov5', 'yolov5s')
20
 
21
+ # Calcula a GLCM e o contraste
22
  def calculate_glcm_contrast(image):
23
  gray_image = cv2.cvtColor(np.array(image), cv2.COLOR_BGR2GRAY)
24
  max_value = gray_image.max() + 1
 
39
 
40
  return contrast
41
 
42
+ # Analisar a textura e a temperatura de cor
43
  def analyze_image_properties(image):
44
+ # Análise de cor (média RGB)
45
  image_rgb = cv2.cvtColor(np.array(image), cv2.COLOR_BGR2RGB)
46
  avg_color_per_row = np.average(image_rgb, axis=0)
47
  avg_color = np.average(avg_color_per_row, axis=0)
48
+
49
+ # Determinar temperatura da cor
50
+ if avg_color[0] > avg_color[2]: # Mais vermelho que azul
51
+ temperature = 'quente'
52
+ else:
53
+ temperature = 'fria'
54
 
55
+ # Análise de textura
56
  texture_contrast = calculate_glcm_contrast(image)
57
  texture = 'lisa' if texture_contrast < 100 else 'texturizada'
58
 
59
  return temperature, texture
60
 
61
+ # Descrever imagem usando BLIP
62
  def describe_image(image):
63
  processor = BlipProcessor.from_pretrained("Salesforce/blip-image-captioning-base")
64
  model = BlipForConditionalGeneration.from_pretrained("Salesforce/blip-image-captioning-base")
 
67
  description = processor.decode(out[0], skip_special_tokens=True)
68
  return description
69
 
70
+ # Traduzir descrição para pt
71
  def translate_description(description):
72
  model_name = 'Helsinki-NLP/opus-mt-tc-big-en-pt'
73
  tokenizer = AutoTokenizer.from_pretrained(model_name)
 
76
  translated_text = tokenizer.decode(translated[0], skip_special_tokens=True)
77
  return translated_text
78
 
79
+ # Processar imagem e gerar saída de voz
80
  def process_image(image):
81
+ # Detecção de objetos
82
  results = model(image)
83
  detected_image = results.render()[0]
84
 
 
 
 
85
  # Análise de textura e temperatura de cor
86
  temperature, texture = analyze_image_properties(image)
87
 
 
89
  description = describe_image(image)
90
  translated_description = translate_description(description)
91
 
92
+ # Construir a descrição final
93
  final_description = f"{translated_description}. A textura é {texture} e a temperatura de cor é {temperature}."
94
 
95
+ # Texto para voz
96
  tts = gTTS(text=final_description, lang='pt')
97
  attempts = 0
98
  while attempts < 5:
 
101
  break
102
  except gTTS.tts.gTTSError as e:
103
  if e.r.status_code == 429:
104
+ print("Too many requests. Waiting before retrying...")
105
  time.sleep(5)
106
  attempts += 1
107
  else:
108
  raise e
109
 
110
+ # Saída
111
  return Image.fromarray(detected_image), final_description, "output.mp3"
112
 
113
+ #
114
  example_image_path = "example1.JPG"
115
 
116
+ # Gradio
117
  iface = gr.Interface(
118
  fn=process_image,
119
  inputs=gr.Image(type="pil"),