Coco-18 commited on
Commit
661887e
·
verified ·
1 Parent(s): 63e07ed

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +78 -0
app.py ADDED
@@ -0,0 +1,78 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from flask import Flask, request, jsonify, send_file
2
+ from flask_cors import CORS
3
+ import torch
4
+ import torchaudio
5
+ import soundfile as sf
6
+ import os
7
+ from transformers import VitsModel, AutoTokenizer
8
+
9
+ app = Flask(__name__)
10
+ CORS(app) # Allow Android requests
11
+
12
+ # Model paths for different languages (Hugging Face Hub)
13
+ MODELS = {
14
+ "kapampangan": "facebook/mms-tts-pam",
15
+ "tagalog": "facebook/mms-tts-tgl",
16
+ "english": "facebook/mms-tts-eng"
17
+ }
18
+
19
+ # Load models and processors
20
+ loaded_models = {}
21
+ loaded_processors = {}
22
+
23
+ for lang, path in MODELS.items():
24
+ print(f"Loading {lang} model from Hugging Face: {path}...")
25
+ loaded_models[lang] = VitsModel.from_pretrained(path)
26
+ loaded_processors[lang] = AutoTokenizer.from_pretrained(path)
27
+ print(f"{lang.capitalize()} model loaded successfully!")
28
+
29
+ SAMPLE_RATE = 16000 # Default sample rate
30
+ OUTPUT_DIR = "/tmp/" # Use /tmp for Hugging Face Spaces (limited storage)
31
+
32
+ @app.route("/tts", methods=["POST"])
33
+ def generate_tts():
34
+ """ API endpoint to generate speech based on the selected language. """
35
+ data = request.get_json()
36
+ text_input = data.get("text", "")
37
+ language = data.get("language", "kapampangan").lower()
38
+
39
+ if language not in MODELS:
40
+ return jsonify({"error": "Invalid language. Choose 'kapampangan', 'tagalog', or 'english'."}), 400
41
+
42
+ if not text_input:
43
+ return jsonify({"error": "No text provided"}), 400
44
+
45
+ print(f"Generating speech for: '{text_input}' in {language}")
46
+
47
+ # Select the correct model and processor
48
+ model = loaded_models[language]
49
+ processor = loaded_processors[language]
50
+
51
+ # Tokenize input text
52
+ inputs = processor(text_input, return_tensors="pt")
53
+
54
+ # Generate audio
55
+ with torch.no_grad():
56
+ output = model.generate(**inputs)
57
+
58
+ waveform = output.cpu().numpy().flatten()
59
+
60
+ # Save as WAV file
61
+ output_filename = os.path.join(OUTPUT_DIR, f"{language}_output.wav")
62
+ sf.write(output_filename, waveform, SAMPLE_RATE)
63
+
64
+ return jsonify({
65
+ "message": "TTS audio generated",
66
+ "file_url": f"/static/{language}_output.wav"
67
+ })
68
+
69
+ @app.route("/static/<filename>")
70
+ def serve_audio(filename):
71
+ """ Serve the generated WAV file. """
72
+ file_path = os.path.join(OUTPUT_DIR, filename)
73
+ if os.path.exists(file_path):
74
+ return send_file(file_path, mimetype="audio/wav")
75
+ return jsonify({"error": "File not found"}), 404
76
+
77
+ if __name__ == "__main__":
78
+ app.run(host="0.0.0.0", port=7860, debug=True)