import gradio as gr from transformers import AutoTokenizer, AutoModelForSeq2SeqLM import torchaudio from torchaudio.transforms import Resample import torch # สร้างโมเดล TTS model_name = "facebook/tts-crdnn-baker-softmax" tokenizer = AutoTokenizer.from_pretrained(model_name) model = AutoModelForSeq2SeqLM.from_pretrained(model_name) # ฟังก์ชันสำหรับแปลงข้อความเป็นเสียง def text_to_speech(text, output_path="generated_audio.wav"): input_ids = tokenizer.encode(text, return_tensors="pt", max_length=150, truncation=True) with torch.no_grad(): audio = model.generate(input_ids) waveform = torchaudio.transforms.Resample(48_000, 24_000)(audio.squeeze().numpy()) torchaudio.save(output_path, waveform, 24_000) def play_audio(audio_path): gr.audio(audio_path, type="player") # สร้าง Gradio interface ที่ใช้ image input, textbox output, button และ audio player demo = gr.Interface( fn=text_to_speech, inputs=gr.Textbox(label="Enter Text"), outputs=[ gr.Audio("audio", type="player"), gr.Button("Convert to Audio", play_audio), ], live=True # ทำให้ Gradio ทำงานแบบไม่บล็อก ) demo.launch(share=True)