dhwani-server

Running on CPU Upgrade

App Files Files Community

sachin commited on Mar 15

Commit

9781b82

1 Parent(s): 7b88e9b

init-asr

Browse files

Files changed (11) hide show

.dockerignore +82 -0
.env.server +9 -0
.gitignore +176 -0
Dockerfile +34 -0
docs/menv.md +9 -0
requirements.txt +7 -0
src/server/config/logging_config.py +35 -0
src/server/config/tts_config.py +27 -0
src/server/main.py +316 -0
src/server/utils/auth.py +21 -0
src/server/utils/text.py +3 -0

.dockerignore ADDED Viewed

	@@ -0,0 +1,82 @@

+samples
+*.log
+venv
+*.nemo
+# Ignore all Python files except those explicitly copied
+!kannada_female.wav
+*.pyc
+*.pyo
+*.pyd
+# Ignore all virtual environments
+venv/
+env/
+.env/
+.venv/
+__pycache__/
+# Ignore build artifacts
+build/
+dist/
+*.egg-info/
+# Ignore local version control files
+.git/
+.gitignore
+# Ignore local environment files
+.env
+# Ignore local log files
+*.log
+# Ignore all node_modules
+node_modules/
+# Ignore all Docker-related files
+Dockerfile
+docker-compose.yml
+# Ignore all local development files
+.vscode/
+.idea/
+.pytest_cache/
+# Ignore all test files
+*.test.*
+*.spec.*
+*_test.*
+*_spec.*
+# Ignore all backup files
+*.bak
+*.swp
+*.tmp
+*.orig
+# Ignore all documentation files
+*.md
+*.txt
+*.rst
+# Ignore all temporary files
+*.tmp
+*.temp
+*.cache
+# Ignore all user-specific files
+*.user
+*.prefs
+*.rc
+# Ignore all unnecessary directories and files
+__pycache__
+__pypackages__
+!requirements.txt
+#!model_requirements.txt
+#!server_requirements.txt

.env.server ADDED Viewed

	@@ -0,0 +1,9 @@

+PORT=7860
+HOST=0.0.0.0
+SPEECH_RATE_LIMIT=5/minute
+CHAT_RATE_LIMIT=100/minute
+EXTERNAL_TTS_URL=https://gaganyatri-tts-indic-server.hf.space/v1/audio/speech
+EXTERNAL_ASR_URL=https://gaganyatri-asr-indic-server-cpu.hf.space
+EXTERNAL_TEXT_GEN_URL=https://your-text-gen-service.example.com
+EXTERNAL_AUDIO_PROC_URL=https://your-audio-proc-service.example.com
+API_KEY_SECRET=your_secret_key

.gitignore ADDED Viewed

	@@ -0,0 +1,176 @@

+# Byte-compiled / optimized / DLL files
+__pycache__/
+*.py[cod]
+*$py.class
+venv_new/
+NeMo/
+asr_venv
+*.nemo
+# C extensions
+*.so
+*.mp3
+!kannada_female.wav
+# Distribution / packaging
+.Python
+build/
+develop-eggs/
+dist/
+downloads/
+eggs/
+.eggs/
+lib/
+lib64/
+parts/
+sdist/
+var/
+wheels/
+share/python-wheels/
+*.egg-info/
+.installed.cfg
+*.egg
+MANIFEST
+# PyInstaller
+#  Usually these files are written by a python script from a template
+#  before PyInstaller builds the exe, so as to inject date/other infos into it.
+*.manifest
+*.spec
+# Installer logs
+pip-log.txt
+pip-delete-this-directory.txt
+# Unit test / coverage reports
+htmlcov/
+.tox/
+.nox/
+.coverage
+.coverage.*
+.cache
+nosetests.xml
+coverage.xml
+*.cover
+*.py,cover
+.hypothesis/
+.pytest_cache/
+cover/
+# Translations
+*.mo
+*.pot
+# Django stuff:
+*.log
+local_settings.py
+db.sqlite3
+db.sqlite3-journal
+# Flask stuff:
+instance/
+.webassets-cache
+# Scrapy stuff:
+.scrapy
+# Sphinx documentation
+docs/_build/
+# PyBuilder
+.pybuilder/
+target/
+# Jupyter Notebook
+.ipynb_checkpoints
+# IPython
+profile_default/
+ipython_config.py
+# pyenv
+#   For a library or package, you might want to ignore these files since the code is
+#   intended to run in multiple environments; otherwise, check them in:
+# .python-version
+# pipenv
+#   According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
+#   However, in case of collaboration, if having platform-specific dependencies or dependencies
+#   having no cross-platform support, pipenv may install dependencies that don't work, or not
+#   install all needed dependencies.
+#Pipfile.lock
+# UV
+#   Similar to Pipfile.lock, it is generally recommended to include uv.lock in version control.
+#   This is especially recommended for binary packages to ensure reproducibility, and is more
+#   commonly ignored for libraries.
+#uv.lock
+# poetry
+#   Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
+#   This is especially recommended for binary packages to ensure reproducibility, and is more
+#   commonly ignored for libraries.
+#   https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
+#poetry.lock
+# pdm
+#   Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
+#pdm.lock
+#   pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it
+#   in version control.
+#   https://pdm.fming.dev/latest/usage/project/#working-with-version-control
+.pdm.toml
+.pdm-python
+.pdm-build/
+# PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
+__pypackages__/
+# Celery stuff
+celerybeat-schedule
+celerybeat.pid
+# SageMath parsed files
+*.sage.py
+# Environments
+.env
+.venv
+env/
+venv/
+ENV/
+env.bak/
+venv.bak/
+# Spyder project settings
+.spyderproject
+.spyproject
+# Rope project settings
+.ropeproject
+# mkdocs documentation
+/site
+# mypy
+.mypy_cache/
+.dmypy.json
+dmypy.json
+# Pyre type checker
+.pyre/
+# pytype static type analyzer
+.pytype/
+# Cython debug symbols
+cython_debug/
+# PyCharm
+#  JetBrains specific template is maintained in a separate JetBrains.gitignore that can
+#  be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
+#  and can be added to the global gitignore or merged into this file.  For a more nuclear
+#  option (not recommended) you can uncomment the following to ignore the entire idea folder.
+#.idea/
+# PyPI configuration file
+.pypirc

Dockerfile ADDED Viewed

	@@ -0,0 +1,34 @@

+# Use official Python runtime as base image
+FROM python:3.10-slim
+WORKDIR /app
+# Set environment variables
+ENV PYTHONDONTWRITEBYTECODE=1
+ENV PYTHONUNBUFFERED=1
+# Install system dependencies
+RUN apt-get update && apt-get install -y \
+    gcc \
+    && rm -rf /var/lib/apt/lists/*
+# Install Python dependencies
+COPY requirements.txt .
+RUN pip install --no-cache-dir -r requirements.txt
+# Copy application code
+COPY . .
+RUN useradd -ms /bin/bash appuser \
+    && chown -R appuser:appuser /app
+USER appuser
+# Expose port from settings (7860 from your code)
+EXPOSE 7860
+# Healthcheck
+HEALTHCHECK --interval=30s --timeout=3s \
+  CMD curl -f http://localhost:7860/v1/health || exit 1
+# Command to run the application with configurable host/port
+CMD ["uvicorn", "main:app", "--host", "0.0.0.0", "--port", "7860"]

docs/menv.md ADDED Viewed

	@@ -0,0 +1,9 @@

+export PORT=7860
+export HOST=0.0.0.0
+export SPEECH_RATE_LIMIT=5/minute
+export CHAT_RATE_LIMIT=100/minute
+export EXTERNAL_TTS_URL=https://gaganyatri-tts-indic-server.hf.space/v1/audio/speech
+export EXTERNAL_ASR_URL=https://gaganyatri-asr-indic-server-cpu.hf.space
+export EXTERNAL_TEXT_GEN_URL=https://your-text-gen-service.example.com
+export EXTERNAL_AUDIO_PROC_URL=https://your-audio-proc-service.example.com
+export API_KEY_SECRET=your_secret_key

requirements.txt ADDED Viewed

	@@ -0,0 +1,7 @@

+uvicorn
+fastapi
+pydantic_settings
+slowapi
+requests
+python-multipart
+pillow

src/server/config/logging_config.py ADDED Viewed

	@@ -0,0 +1,35 @@

+import logging
+import logging.config
+from logging.handlers import RotatingFileHandler
+from .tts_config import config
+logging_config = {
+    "version": 1,
+    "disable_existing_loggers": False,
+    "formatters": {
+        "simple": {"format": "%(asctime)s - %(name)s - %(levelname)s - %(message)s"},
+    },
+    "handlers": {
+        "stdout": {
+            "class": "logging.StreamHandler",
+            "formatter": "simple",
+            "stream": "ext://sys.stdout",
+        },
+        "file": {
+            "class": "logging.handlers.RotatingFileHandler",
+            "formatter": "simple",
+            "filename": "dhwani_api.log",
+            "maxBytes": 10 * 1024 * 1024,  # 10MB
+            "backupCount": 5,
+        },
+    },
+    "loggers": {
+        "root": {
+            "level": config.log_level.upper(),
+            "handlers": ["stdout", "file"],
+        },
+    },
+}
+logging.config.dictConfig(logging_config)
+logger = logging.getLogger("indic_all_server")

src/server/config/tts_config.py ADDED Viewed

	@@ -0,0 +1,27 @@

+import enum
+from pydantic_settings import BaseSettings
+SPEED = 1.0
+class StrEnum(str, enum.Enum):
+    def __str__(self):
+        return str(self.value)
+class ResponseFormat(StrEnum):
+    MP3 = "mp3"
+    FLAC = "flac"
+    WAV = "wav"
+class Config(BaseSettings):
+    log_level: str = "info"
+    model: str = "ai4bharat/indic-parler-tts"
+    max_models: int = 1
+    lazy_load_model: bool = False  # Unused now, as all models are lazy-loaded
+    input: str = "ನಿಮ್ಮ ಇನ್‌ಪುಟ್ ಪಠ್ಯವನ್ನು ಇಲ್ಲಿ ಸೇರಿಸಿ"
+    voice: str = (
+        "Female speaks with a high pitch at a normal pace in a clear, close-sounding environment. "
+        "Her neutral tone is captured with excellent audio quality."
+    )
+    response_format: ResponseFormat = ResponseFormat.MP3
+config = Config()

src/server/main.py ADDED Viewed

	@@ -0,0 +1,316 @@

+import argparse
+import io
+from time import time
+from typing import List, Optional
+from abc import ABC, abstractmethod
+import uvicorn
+from fastapi import Depends, FastAPI, File, HTTPException, Query, Request, UploadFile, Form
+from fastapi.middleware.cors import CORSMiddleware
+from fastapi.responses import JSONResponse, RedirectResponse, StreamingResponse
+from pydantic import BaseModel, Field, field_validator
+from pydantic_settings import BaseSettings
+from slowapi import Limiter
+from slowapi.util import get_remote_address
+import requests
+from PIL import Image
+# Assuming these are in your project structure
+from config.tts_config import SPEED, ResponseFormat, config as tts_config
+from config.logging_config import logger
+from utils.auth import get_api_key
+# Configuration settings
+class Settings(BaseSettings):
+    llm_model_name: str = "google/gemma-3-4b-it"
+    max_tokens: int = 512
+    host: str = "0.0.0.0"
+    port: int = 7860
+    chat_rate_limit: str = "100/minute"
+    speech_rate_limit: str = "5/minute"
+    external_tts_url: str = Field(..., env="EXTERNAL_TTS_URL")
+    external_asr_url: str = Field(..., env="EXTERNAL_ASR_URL")
+    external_text_gen_url: str = Field(..., env="EXTERNAL_TEXT_GEN_URL")
+    external_audio_proc_url: str = Field(..., env="EXTERNAL_AUDIO_PROC_URL")
+    api_key_secret: str = Field(..., env="API_KEY_SECRET")
+    @field_validator("chat_rate_limit", "speech_rate_limit")
+    def validate_rate_limit(cls, v):
+        if not v.count("/") == 1 or not v.split("/")[0].isdigit():
+            raise ValueError("Rate limit must be in format 'number/period' (e.g., '5/minute')")
+        return v
+    class Config:
+        env_file = ".env"
+        env_file_encoding = "utf-8"
+settings = Settings()
+# FastAPI app setup
+app = FastAPI(
+    title="Dhwani API",
+    description="AI Chat API supporting Indian languages",
+    version="1.0.0",
+    redirect_slashes=False,
+)
+app.add_middleware(
+    CORSMiddleware,
+    allow_origins=["*"],
+    allow_credentials=False,
+    allow_methods=["*"],
+    allow_headers=["*"],
+)
+limiter = Limiter(key_func=get_remote_address)
+app.state.limiter = limiter
+# Request/Response Models
+class SpeechRequest(BaseModel):
+    input: str
+    voice: str
+    model: str
+    response_format: ResponseFormat = tts_config.response_format
+    speed: float = SPEED
+    @field_validator("input")
+    def input_must_be_valid(cls, v):
+        if len(v) > 1000:
+            raise ValueError("Input cannot exceed 1000 characters")
+        return v.strip()
+    @field_validator("response_format")
+    def validate_response_format(cls, v):
+        supported_formats = [ResponseFormat.MP3, ResponseFormat.FLAC, ResponseFormat.WAV]
+        if v not in supported_formats:
+            raise ValueError(f"Response format must be one of {[fmt.value for fmt in supported_formats]}")
+        return v
+class TranscriptionResponse(BaseModel):
+    text: str
+class TextGenerationResponse(BaseModel):
+    text: str
+class AudioProcessingResponse(BaseModel):
+    result: str
+# TTS Service Interface
+class TTSService(ABC):
+    @abstractmethod
+    async def generate_speech(self, payload: dict) -> requests.Response:
+        pass
+class ExternalTTSService(TTSService):
+    async def generate_speech(self, payload: dict) -> requests.Response:
+        try:
+            return requests.post(
+                settings.external_tts_url,
+                json=payload,
+                headers={"accept": "application/json", "Content-Type": "application/json"},
+                stream=True,
+                timeout=10
+            )
+        except requests.Timeout:
+            raise HTTPException(status_code=504, detail="External TTS API timeout")
+        except requests.RequestException as e:
+            raise HTTPException(status_code=500, detail=f"External TTS API error: {str(e)}")
+def get_tts_service() -> TTSService:
+    return ExternalTTSService()
+# Endpoints
+@app.get("/v1/health")
+async def health_check():
+    return {"status": "healthy", "model": settings.llm_model_name}
+@app.get("/")
+async def home():
+    return RedirectResponse(url="/docs")
+@app.post("/v1/audio/speech")
+@limiter.limit(settings.speech_rate_limit)
+async def generate_audio(
+    request: Request,
+    speech_request: SpeechRequest = Depends(),
+    api_key: str = Depends(get_api_key),
+    tts_service: TTSService = Depends(get_tts_service)
+):
+    if not speech_request.input.strip():
+        raise HTTPException(status_code=400, detail="Input cannot be empty")
+    logger.info("Processing speech request", extra={
+        "endpoint": "/v1/audio/speech",
+        "input_length": len(speech_request.input),
+        "client_ip": get_remote_address(request)
+    })
+    payload = {
+        "input": speech_request.input,
+        "voice": speech_request.voice,
+        "model": speech_request.model,
+        "response_format": speech_request.response_format.value,
+        "speed": speech_request.speed
+    }
+    response = await tts_service.generate_speech(payload)
+    response.raise_for_status()
+    headers = {
+        "Content-Disposition": f"inline; filename=\"speech.{speech_request.response_format.value}\"",
+        "Cache-Control": "no-cache",
+        "Content-Type": f"audio/{speech_request.response_format.value}"
+    }
+    return StreamingResponse(
+        response.iter_content(chunk_size=8192),
+        media_type=f"audio/{speech_request.response_format.value}",
+        headers=headers
+    )
+@app.post("/v1/generate_text/", response_model=TextGenerationResponse)
+@limiter.limit(settings.chat_rate_limit)
+async def generate_text(
+    file: UploadFile = File(...),
+    language: str = Query(..., enum=["kannada", "hindi", "tamil"]),
+    api_key: str = Depends(get_api_key),
+    request: Request = None,
+):
+    logger.info("Processing text generation request", extra={
+        "endpoint": "/v1/generate_text",
+        "filename": file.filename,
+        "client_ip": get_remote_address(request)
+    })
+    start_time = time()
+    try:
+        file_content = await file.read()
+        files = {"file": (file.filename, file_content, file.content_type)}
+        external_url = f"{settings.external_text_gen_url}/generate_text/?language={language}"
+        response = requests.post(
+            external_url,
+            files=files,
+            headers={"accept": "application/json"},
+            timeout=10
+        )
+        response.raise_for_status()
+        generated_text = response.json().get("text", "")
+        logger.info(f"Text generation completed in {time() - start_time:.2f} seconds")
+        return TextGenerationResponse(text=generated_text)
+    except requests.Timeout:
+        raise HTTPException(status_code=504, detail="Text generation service timeout")
+    except requests.RequestException as e:
+        logger.error(f"Text generation request failed: {str(e)}")
+        raise HTTPException(status_code=500, detail=f"Text generation failed: {str(e)}")
+@app.post("/v1/process_audio/", response_model=AudioProcessingResponse)
+@limiter.limit(settings.chat_rate_limit)
+async def process_audio(
+    file: UploadFile = File(...),
+    language: str = Query(..., enum=["kannada", "hindi", "tamil"]),
+    api_key: str = Depends(get_api_key),
+    request: Request = None,
+):
+    logger.info("Processing audio processing request", extra={
+        "endpoint": "/v1/process_audio",
+        "filename": file.filename,
+        "client_ip": get_remote_address(request)
+    })
+    start_time = time()
+    try:
+        file_content = await file.read()
+        files = {"file": (file.filename, file_content, file.content_type)}
+        external_url = f"{settings.external_audio_proc_url}/process_audio/?language={language}"
+        response = requests.post(
+            external_url,
+            files=files,
+            headers={"accept": "application/json"},
+            timeout=10
+        )
+        response.raise_for_status()
+        processed_result = response.json().get("result", "")
+        logger.info(f"Audio processing completed in {time() - start_time:.2f} seconds")
+        return AudioProcessingResponse(result=processed_result)
+    except requests.Timeout:
+        raise HTTPException(status_code=504, detail="Audio processing service timeout")
+    except requests.RequestException as e:
+        logger.error(f"Audio processing request failed: {str(e)}")
+        raise HTTPException(status_code=500, detail=f"Audio processing failed: {str(e)}")
+@app.post("/v1/transcribe/", response_model=TranscriptionResponse)
+async def transcribe_audio(
+    file: UploadFile = File(...),
+    language: str = Query(..., enum=["kannada", "hindi", "tamil"]),
+    #api_key: str = Depends(get_api_key),
+    request: Request = None,
+):
+    '''
+    logger.info("Processing transcription request", extra={
+        "endpoint": "/v1/transcribe",
+        "filename": file.filename,
+        "client_ip": get_remote_address(request)
+    })
+    '''
+    start_time = time()
+    try:
+        file_content = await file.read()
+        files = {"file": (file.filename, file_content, file.content_type)}
+        external_url = f"{settings.external_asr_url}/transcribe/?language={language}"
+        response = requests.post(
+            external_url,
+            files=files,
+            headers={"accept": "application/json"},
+            timeout=10
+        )
+        response.raise_for_status()
+        transcription = response.json().get("text", "")
+        #logger.info(f"Transcription completed in {time() - start_time:.2f} seconds")
+        return TranscriptionResponse(text=transcription)
+    except requests.Timeout:
+        raise HTTPException(status_code=504, detail="Transcription service timeout")
+    except requests.RequestException as e:
+        #logger.error(f"Transcription request failed: {str(e)}")
+        raise HTTPException(status_code=500, detail=f"Transcription failed: {str(e)}")
+@app.post("/v1/chat_v2", response_model=TranscriptionResponse)
+@limiter.limit(settings.chat_rate_limit)
+async def chat_v2(
+    request: Request,
+    prompt: str = Form(...),
+    image: UploadFile = File(default=None),
+    api_key: str = Depends(get_api_key)
+):
+    if not prompt:
+        raise HTTPException(status_code=400, detail="Prompt cannot be empty")
+    logger.info("Processing chat_v2 request", extra={
+        "endpoint": "/v1/chat_v2",
+        "prompt_length": len(prompt),
+        "has_image": bool(image),
+        "client_ip": get_remote_address(request)
+    })
+    try:
+        # For demonstration, we'll just return the prompt as text
+        image_data = Image.open(await image.read()) if image else None
+        response_text = f"Processed: {prompt}" + (" with image" if image_data else "")
+        return TranscriptionResponse(text=response_text)
+    except Exception as e:
+        logger.error(f"Chat_v2 processing failed: {str(e)}", exc_info=True)
+        raise HTTPException(status_code=500, detail=f"An error occurred: {str(e)}")
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser(description="Run the FastAPI server.")
+    parser.add_argument("--port", type=int, default=settings.port, help="Port to run the server on.")
+    parser.add_argument("--host", type=str, default=settings.host, help="Host to run the server on.")
+    args = parser.parse_args()
+    uvicorn.run(app, host=args.host, port=args.port)

src/server/utils/auth.py ADDED Viewed

	@@ -0,0 +1,21 @@

+from fastapi.security import APIKeyHeader
+from fastapi import HTTPException, status, Depends
+from pydantic_settings import BaseSettings
+from config.logging_config import logger
+class Settings(BaseSettings):
+    api_key: str
+    class Config:
+        env_file = ".env"
+settings = Settings()
+API_KEY_NAME = "X-API-Key"
+api_key_header = APIKeyHeader(name=API_KEY_NAME, auto_error=False)
+async def get_api_key(api_key: str = Depends(api_key_header)):
+    if api_key != settings.api_key:
+        logger.warning(f"Failed API key attempt: {api_key}")
+        raise HTTPException(status_code=status.HTTP_401_UNAUTHORIZED, detail="Invalid API Key")
+    logger.info("API key validated successfully")
+    return api_key

src/server/utils/text.py ADDED Viewed

	@@ -0,0 +1,3 @@

+def chunk_text(text: str, chunk_size: int = 15) -> list[str]:
+    words = text.split()
+    return [' '.join(words[i:i + chunk_size]) for i in range(0, len(words), chunk_size)]