Spaces:
Sleeping
Sleeping
new route
Browse files- Dockerfile +1 -1
- main.py +26 -6
Dockerfile
CHANGED
@@ -19,7 +19,7 @@ RUN pip install --upgrade pip
|
|
19 |
RUN pip install torch torchvision
|
20 |
|
21 |
# Install Hugging Face Transformers and other dependencies
|
22 |
-
RUN pip install transformers librosa deep-translator python-multipart fastapi uvicorn
|
23 |
|
24 |
# Copy the main script
|
25 |
COPY --chown=user main.py .
|
|
|
19 |
RUN pip install torch torchvision
|
20 |
|
21 |
# Install Hugging Face Transformers and other dependencies
|
22 |
+
RUN pip install transformers librosa deep-translator python-multipart fastapi uvicorn sentencepiece
|
23 |
|
24 |
# Copy the main script
|
25 |
COPY --chown=user main.py .
|
main.py
CHANGED
@@ -1,4 +1,4 @@
|
|
1 |
-
from fastapi import FastAPI, UploadFile, File
|
2 |
from transformers import pipeline
|
3 |
import librosa
|
4 |
from deep_translator import GoogleTranslator
|
@@ -6,23 +6,43 @@ import io
|
|
6 |
|
7 |
app = FastAPI()
|
8 |
|
9 |
-
|
10 |
-
|
11 |
-
|
|
|
12 |
|
13 |
print("Loading translator")
|
14 |
translator = GoogleTranslator(source='ku', target='fr')
|
15 |
print("Translator loaded")
|
16 |
|
|
|
|
|
|
|
|
|
|
|
17 |
|
18 |
def speech2text(audio_data: bytes):
|
19 |
-
audio_array, _ = librosa.load(io.BytesIO(audio_data), sr=16000)
|
|
|
20 |
output = pipe(audio_array)
|
21 |
return output["text"]
|
22 |
|
|
|
|
|
|
|
|
|
|
|
23 |
@app.post("/transcribe")
|
24 |
async def transcribe(file: UploadFile = File(...)):
|
25 |
audio_data = await file.read()
|
26 |
text_output = speech2text(audio_data)
|
27 |
translated = translator.translate(text_output)
|
28 |
-
return {"text": text_output, "translation": translated}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from fastapi import FastAPI, UploadFile, File, Response
|
2 |
from transformers import pipeline
|
3 |
import librosa
|
4 |
from deep_translator import GoogleTranslator
|
|
|
6 |
|
7 |
app = FastAPI()
|
8 |
|
9 |
+
|
10 |
+
# print("Loading Speech Recognition")
|
11 |
+
|
12 |
+
# print("Speech Recognition Loaded")
|
13 |
|
14 |
print("Loading translator")
|
15 |
translator = GoogleTranslator(source='ku', target='fr')
|
16 |
print("Translator loaded")
|
17 |
|
18 |
+
# print("Loading tts")
|
19 |
+
|
20 |
+
# print("TTS loaded")
|
21 |
+
|
22 |
+
|
23 |
|
24 |
def speech2text(audio_data: bytes):
|
25 |
+
audio_array, _ = librosa.load(io.BytesIO(audio_data), sr=16000)
|
26 |
+
pipe = pipeline("automatic-speech-recognition", model="Akashpb13/xlsr_kurmanji_kurdish")
|
27 |
output = pipe(audio_array)
|
28 |
return output["text"]
|
29 |
|
30 |
+
def text2speech(text:str):
|
31 |
+
tts = pipeline("text-to-audio", model="roshna-omer/speecht5_tts_krd-kmr_CV17.0")
|
32 |
+
output = tts(text)
|
33 |
+
return output["audio"]
|
34 |
+
|
35 |
@app.post("/transcribe")
|
36 |
async def transcribe(file: UploadFile = File(...)):
|
37 |
audio_data = await file.read()
|
38 |
text_output = speech2text(audio_data)
|
39 |
translated = translator.translate(text_output)
|
40 |
+
return {"text": text_output, "translation": translated}
|
41 |
+
|
42 |
+
@app.post("/transcribe_audio")
|
43 |
+
async def transcribe_and_return_audio(file: UploadFile = File(...)):
|
44 |
+
audio_data = await file.read()
|
45 |
+
text_output = speech2text(audio_data)
|
46 |
+
audio_output = text2speech(text_output)
|
47 |
+
|
48 |
+
return Response(content=audio_output, media_type="audio/wav")
|