Nepjune commited on
Commit
ff7ab28
·
verified ·
1 Parent(s): 97599f2

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +6 -10
app.py CHANGED
@@ -2,12 +2,8 @@ import gradio as gr
2
  from transformers import BlipProcessor, BlipForConditionalGeneration
3
  from gtts import gTTS
4
  from playsound import playsound
5
- from transformers import pipeline
6
 
7
- # Load the text-to-speech model
8
- tts_synthesizer = pipeline("text-to-speech", "suno/bark")
9
-
10
- # Load the image-to-text model
11
  model_id = "dblasko/blip-dalle3-img2prompt"
12
  model = BlipForConditionalGeneration.from_pretrained(model_id)
13
  processor = BlipProcessor.from_pretrained(model_id)
@@ -20,10 +16,9 @@ def generate_caption(image):
20
  generated_caption = processor.batch_decode(generated_ids, skip_special_tokens=True, temperature=0.8, top_k=40, top_p=0.9)[0]
21
 
22
  # Convert the generated caption to speech
23
- speech = tts_synthesizer(generated_caption)
24
- audio_path = "generated_audio.wav"
25
- with open(audio_path, "wb") as f:
26
- f.write(speech["audio"])
27
 
28
  return generated_caption, audio_path
29
 
@@ -37,6 +32,7 @@ demo = gr.Interface(
37
  outputs=[
38
  gr.Textbox(label="Generated caption"),
39
  gr.Button("Convert to Audio", play_audio),
40
- ]
 
41
  )
42
  demo.launch(share=True)
 
2
  from transformers import BlipProcessor, BlipForConditionalGeneration
3
  from gtts import gTTS
4
  from playsound import playsound
5
+ from concurrent.futures import ThreadPoolExecutor
6
 
 
 
 
 
7
  model_id = "dblasko/blip-dalle3-img2prompt"
8
  model = BlipForConditionalGeneration.from_pretrained(model_id)
9
  processor = BlipProcessor.from_pretrained(model_id)
 
16
  generated_caption = processor.batch_decode(generated_ids, skip_special_tokens=True, temperature=0.8, top_k=40, top_p=0.9)[0]
17
 
18
  # Convert the generated caption to speech
19
+ tts = gTTS(text=generated_caption, lang='en')
20
+ audio_path = "generated_audio.mp3"
21
+ tts.save(audio_path)
 
22
 
23
  return generated_caption, audio_path
24
 
 
32
  outputs=[
33
  gr.Textbox(label="Generated caption"),
34
  gr.Button("Convert to Audio", play_audio),
35
+ ],
36
+ live=True # ทำให้ Gradio ทำงานแบบไม่บล็อก
37
  )
38
  demo.launch(share=True)