Spaces:
Sleeping
Sleeping
First commit
Browse files- Dockerfile +24 -0
- main.py +28 -0
Dockerfile
ADDED
|
@@ -0,0 +1,24 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Use an official Python image as the base image
|
| 2 |
+
FROM python:3.8-slim
|
| 3 |
+
|
| 4 |
+
# Set the working directory inside the container
|
| 5 |
+
WORKDIR /app
|
| 6 |
+
|
| 7 |
+
# Install system dependencies
|
| 8 |
+
RUN apt-get update && apt-get install
|
| 9 |
+
|
| 10 |
+
# Install PyTorch
|
| 11 |
+
RUN pip install --upgrade pip
|
| 12 |
+
RUN pip install torch torchvision
|
| 13 |
+
|
| 14 |
+
# Install Hugging Face Transformers and other dependencies
|
| 15 |
+
RUN pip install transformers librosa deep-translator python-multipart fastapi uvicorn
|
| 16 |
+
|
| 17 |
+
# Copy the main script
|
| 18 |
+
COPY main.py .
|
| 19 |
+
|
| 20 |
+
# Expose the port the app runs on
|
| 21 |
+
EXPOSE 8000
|
| 22 |
+
|
| 23 |
+
# Set the default command to run your FastAPI app or any other server
|
| 24 |
+
CMD ["uvicorn", "main:app", "--host", "0.0.0.0", "--port", "8000"]
|
main.py
ADDED
|
@@ -0,0 +1,28 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from fastapi import FastAPI, UploadFile, File
|
| 2 |
+
from transformers import pipeline
|
| 3 |
+
import librosa
|
| 4 |
+
from deep_translator import GoogleTranslator
|
| 5 |
+
import io
|
| 6 |
+
|
| 7 |
+
app = FastAPI()
|
| 8 |
+
|
| 9 |
+
print("Loading Speech Recognition")
|
| 10 |
+
pipe = pipeline("automatic-speech-recognition", model="Akashpb13/xlsr_kurmanji_kurdish")
|
| 11 |
+
print("Speech Recognition Loaded")
|
| 12 |
+
|
| 13 |
+
print("Loading translator")
|
| 14 |
+
translator = GoogleTranslator(source='ku', target='fr')
|
| 15 |
+
print("Translator loaded")
|
| 16 |
+
|
| 17 |
+
|
| 18 |
+
def speech2text(audio_data: bytes):
|
| 19 |
+
audio_array, _ = librosa.load(io.BytesIO(audio_data), sr=16000)
|
| 20 |
+
output = pipe(audio_array)
|
| 21 |
+
return output["text"]
|
| 22 |
+
|
| 23 |
+
@app.post("/transcribe")
|
| 24 |
+
async def transcribe(file: UploadFile = File(...)):
|
| 25 |
+
audio_data = await file.read()
|
| 26 |
+
text_output = speech2text(audio_data)
|
| 27 |
+
translated = translator.translate(text_output)
|
| 28 |
+
return {"text": text_output, "translation": translated}
|