sachin commited on
Commit
9781b82
·
1 Parent(s): 7b88e9b
.dockerignore ADDED
@@ -0,0 +1,82 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ samples
2
+
3
+ *.log
4
+ venv
5
+ *.nemo
6
+
7
+ # Ignore all Python files except those explicitly copied
8
+ !kannada_female.wav
9
+ *.pyc
10
+ *.pyo
11
+ *.pyd
12
+
13
+ # Ignore all virtual environments
14
+ venv/
15
+ env/
16
+ .env/
17
+ .venv/
18
+ __pycache__/
19
+
20
+ # Ignore build artifacts
21
+ build/
22
+ dist/
23
+ *.egg-info/
24
+
25
+ # Ignore local version control files
26
+ .git/
27
+ .gitignore
28
+
29
+ # Ignore local environment files
30
+ .env
31
+
32
+ # Ignore local log files
33
+ *.log
34
+
35
+ # Ignore all node_modules
36
+ node_modules/
37
+
38
+ # Ignore all Docker-related files
39
+ Dockerfile
40
+ docker-compose.yml
41
+
42
+ # Ignore all local development files
43
+ .vscode/
44
+ .idea/
45
+ .pytest_cache/
46
+
47
+ # Ignore all test files
48
+ *.test.*
49
+ *.spec.*
50
+ *_test.*
51
+ *_spec.*
52
+
53
+ # Ignore all backup files
54
+ *.bak
55
+ *.swp
56
+ *.tmp
57
+ *.orig
58
+
59
+ # Ignore all documentation files
60
+ *.md
61
+ *.txt
62
+ *.rst
63
+
64
+ # Ignore all temporary files
65
+ *.tmp
66
+ *.temp
67
+ *.cache
68
+
69
+ # Ignore all user-specific files
70
+ *.user
71
+ *.prefs
72
+ *.rc
73
+
74
+ # Ignore all unnecessary directories and files
75
+ __pycache__
76
+ __pypackages__
77
+
78
+
79
+ !requirements.txt
80
+
81
+ #!model_requirements.txt
82
+ #!server_requirements.txt
.env.server ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ PORT=7860
2
+ HOST=0.0.0.0
3
+ SPEECH_RATE_LIMIT=5/minute
4
+ CHAT_RATE_LIMIT=100/minute
5
+ EXTERNAL_TTS_URL=https://gaganyatri-tts-indic-server.hf.space/v1/audio/speech
6
+ EXTERNAL_ASR_URL=https://gaganyatri-asr-indic-server-cpu.hf.space
7
+ EXTERNAL_TEXT_GEN_URL=https://your-text-gen-service.example.com
8
+ EXTERNAL_AUDIO_PROC_URL=https://your-audio-proc-service.example.com
9
+ API_KEY_SECRET=your_secret_key
.gitignore ADDED
@@ -0,0 +1,176 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Byte-compiled / optimized / DLL files
2
+ __pycache__/
3
+ *.py[cod]
4
+ *$py.class
5
+ venv_new/
6
+ NeMo/
7
+ asr_venv
8
+ *.nemo
9
+ # C extensions
10
+ *.so
11
+ *.mp3
12
+
13
+ !kannada_female.wav
14
+ # Distribution / packaging
15
+ .Python
16
+ build/
17
+ develop-eggs/
18
+ dist/
19
+ downloads/
20
+ eggs/
21
+ .eggs/
22
+ lib/
23
+ lib64/
24
+ parts/
25
+ sdist/
26
+ var/
27
+ wheels/
28
+ share/python-wheels/
29
+ *.egg-info/
30
+ .installed.cfg
31
+ *.egg
32
+ MANIFEST
33
+
34
+ # PyInstaller
35
+ # Usually these files are written by a python script from a template
36
+ # before PyInstaller builds the exe, so as to inject date/other infos into it.
37
+ *.manifest
38
+ *.spec
39
+
40
+ # Installer logs
41
+ pip-log.txt
42
+ pip-delete-this-directory.txt
43
+
44
+ # Unit test / coverage reports
45
+ htmlcov/
46
+ .tox/
47
+ .nox/
48
+ .coverage
49
+ .coverage.*
50
+ .cache
51
+ nosetests.xml
52
+ coverage.xml
53
+ *.cover
54
+ *.py,cover
55
+ .hypothesis/
56
+ .pytest_cache/
57
+ cover/
58
+
59
+ # Translations
60
+ *.mo
61
+ *.pot
62
+
63
+ # Django stuff:
64
+ *.log
65
+ local_settings.py
66
+ db.sqlite3
67
+ db.sqlite3-journal
68
+
69
+ # Flask stuff:
70
+ instance/
71
+ .webassets-cache
72
+
73
+ # Scrapy stuff:
74
+ .scrapy
75
+
76
+ # Sphinx documentation
77
+ docs/_build/
78
+
79
+ # PyBuilder
80
+ .pybuilder/
81
+ target/
82
+
83
+ # Jupyter Notebook
84
+ .ipynb_checkpoints
85
+
86
+ # IPython
87
+ profile_default/
88
+ ipython_config.py
89
+
90
+ # pyenv
91
+ # For a library or package, you might want to ignore these files since the code is
92
+ # intended to run in multiple environments; otherwise, check them in:
93
+ # .python-version
94
+
95
+ # pipenv
96
+ # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
97
+ # However, in case of collaboration, if having platform-specific dependencies or dependencies
98
+ # having no cross-platform support, pipenv may install dependencies that don't work, or not
99
+ # install all needed dependencies.
100
+ #Pipfile.lock
101
+
102
+ # UV
103
+ # Similar to Pipfile.lock, it is generally recommended to include uv.lock in version control.
104
+ # This is especially recommended for binary packages to ensure reproducibility, and is more
105
+ # commonly ignored for libraries.
106
+ #uv.lock
107
+
108
+ # poetry
109
+ # Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
110
+ # This is especially recommended for binary packages to ensure reproducibility, and is more
111
+ # commonly ignored for libraries.
112
+ # https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
113
+ #poetry.lock
114
+
115
+ # pdm
116
+ # Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
117
+ #pdm.lock
118
+ # pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it
119
+ # in version control.
120
+ # https://pdm.fming.dev/latest/usage/project/#working-with-version-control
121
+ .pdm.toml
122
+ .pdm-python
123
+ .pdm-build/
124
+
125
+ # PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
126
+ __pypackages__/
127
+
128
+ # Celery stuff
129
+ celerybeat-schedule
130
+ celerybeat.pid
131
+
132
+ # SageMath parsed files
133
+ *.sage.py
134
+
135
+ # Environments
136
+ .env
137
+ .venv
138
+ env/
139
+ venv/
140
+ ENV/
141
+ env.bak/
142
+ venv.bak/
143
+
144
+ # Spyder project settings
145
+ .spyderproject
146
+ .spyproject
147
+
148
+ # Rope project settings
149
+ .ropeproject
150
+
151
+ # mkdocs documentation
152
+ /site
153
+
154
+ # mypy
155
+ .mypy_cache/
156
+ .dmypy.json
157
+ dmypy.json
158
+
159
+ # Pyre type checker
160
+ .pyre/
161
+
162
+ # pytype static type analyzer
163
+ .pytype/
164
+
165
+ # Cython debug symbols
166
+ cython_debug/
167
+
168
+ # PyCharm
169
+ # JetBrains specific template is maintained in a separate JetBrains.gitignore that can
170
+ # be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
171
+ # and can be added to the global gitignore or merged into this file. For a more nuclear
172
+ # option (not recommended) you can uncomment the following to ignore the entire idea folder.
173
+ #.idea/
174
+
175
+ # PyPI configuration file
176
+ .pypirc
Dockerfile ADDED
@@ -0,0 +1,34 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Use official Python runtime as base image
2
+ FROM python:3.10-slim
3
+
4
+ WORKDIR /app
5
+
6
+ # Set environment variables
7
+ ENV PYTHONDONTWRITEBYTECODE=1
8
+ ENV PYTHONUNBUFFERED=1
9
+
10
+ # Install system dependencies
11
+ RUN apt-get update && apt-get install -y \
12
+ gcc \
13
+ && rm -rf /var/lib/apt/lists/*
14
+
15
+ # Install Python dependencies
16
+ COPY requirements.txt .
17
+ RUN pip install --no-cache-dir -r requirements.txt
18
+
19
+ # Copy application code
20
+ COPY . .
21
+
22
+ RUN useradd -ms /bin/bash appuser \
23
+ && chown -R appuser:appuser /app
24
+
25
+ USER appuser
26
+ # Expose port from settings (7860 from your code)
27
+ EXPOSE 7860
28
+
29
+ # Healthcheck
30
+ HEALTHCHECK --interval=30s --timeout=3s \
31
+ CMD curl -f http://localhost:7860/v1/health || exit 1
32
+
33
+ # Command to run the application with configurable host/port
34
+ CMD ["uvicorn", "main:app", "--host", "0.0.0.0", "--port", "7860"]
docs/menv.md ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ export PORT=7860
2
+ export HOST=0.0.0.0
3
+ export SPEECH_RATE_LIMIT=5/minute
4
+ export CHAT_RATE_LIMIT=100/minute
5
+ export EXTERNAL_TTS_URL=https://gaganyatri-tts-indic-server.hf.space/v1/audio/speech
6
+ export EXTERNAL_ASR_URL=https://gaganyatri-asr-indic-server-cpu.hf.space
7
+ export EXTERNAL_TEXT_GEN_URL=https://your-text-gen-service.example.com
8
+ export EXTERNAL_AUDIO_PROC_URL=https://your-audio-proc-service.example.com
9
+ export API_KEY_SECRET=your_secret_key
requirements.txt ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ uvicorn
2
+ fastapi
3
+ pydantic_settings
4
+ slowapi
5
+ requests
6
+ python-multipart
7
+ pillow
src/server/config/logging_config.py ADDED
@@ -0,0 +1,35 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import logging
2
+ import logging.config
3
+ from logging.handlers import RotatingFileHandler
4
+ from .tts_config import config
5
+
6
+ logging_config = {
7
+ "version": 1,
8
+ "disable_existing_loggers": False,
9
+ "formatters": {
10
+ "simple": {"format": "%(asctime)s - %(name)s - %(levelname)s - %(message)s"},
11
+ },
12
+ "handlers": {
13
+ "stdout": {
14
+ "class": "logging.StreamHandler",
15
+ "formatter": "simple",
16
+ "stream": "ext://sys.stdout",
17
+ },
18
+ "file": {
19
+ "class": "logging.handlers.RotatingFileHandler",
20
+ "formatter": "simple",
21
+ "filename": "dhwani_api.log",
22
+ "maxBytes": 10 * 1024 * 1024, # 10MB
23
+ "backupCount": 5,
24
+ },
25
+ },
26
+ "loggers": {
27
+ "root": {
28
+ "level": config.log_level.upper(),
29
+ "handlers": ["stdout", "file"],
30
+ },
31
+ },
32
+ }
33
+
34
+ logging.config.dictConfig(logging_config)
35
+ logger = logging.getLogger("indic_all_server")
src/server/config/tts_config.py ADDED
@@ -0,0 +1,27 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import enum
2
+ from pydantic_settings import BaseSettings
3
+
4
+ SPEED = 1.0
5
+
6
+ class StrEnum(str, enum.Enum):
7
+ def __str__(self):
8
+ return str(self.value)
9
+
10
+ class ResponseFormat(StrEnum):
11
+ MP3 = "mp3"
12
+ FLAC = "flac"
13
+ WAV = "wav"
14
+
15
+ class Config(BaseSettings):
16
+ log_level: str = "info"
17
+ model: str = "ai4bharat/indic-parler-tts"
18
+ max_models: int = 1
19
+ lazy_load_model: bool = False # Unused now, as all models are lazy-loaded
20
+ input: str = "ನಿಮ್ಮ ಇನ್‌ಪುಟ್ ಪಠ್ಯವನ್ನು ಇಲ್ಲಿ ಸೇರಿಸಿ"
21
+ voice: str = (
22
+ "Female speaks with a high pitch at a normal pace in a clear, close-sounding environment. "
23
+ "Her neutral tone is captured with excellent audio quality."
24
+ )
25
+ response_format: ResponseFormat = ResponseFormat.MP3
26
+
27
+ config = Config()
src/server/main.py ADDED
@@ -0,0 +1,316 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import argparse
2
+ import io
3
+ from time import time
4
+ from typing import List, Optional
5
+ from abc import ABC, abstractmethod
6
+
7
+ import uvicorn
8
+ from fastapi import Depends, FastAPI, File, HTTPException, Query, Request, UploadFile, Form
9
+ from fastapi.middleware.cors import CORSMiddleware
10
+ from fastapi.responses import JSONResponse, RedirectResponse, StreamingResponse
11
+ from pydantic import BaseModel, Field, field_validator
12
+ from pydantic_settings import BaseSettings
13
+ from slowapi import Limiter
14
+ from slowapi.util import get_remote_address
15
+ import requests
16
+ from PIL import Image
17
+
18
+ # Assuming these are in your project structure
19
+ from config.tts_config import SPEED, ResponseFormat, config as tts_config
20
+ from config.logging_config import logger
21
+ from utils.auth import get_api_key
22
+
23
+ # Configuration settings
24
+ class Settings(BaseSettings):
25
+ llm_model_name: str = "google/gemma-3-4b-it"
26
+ max_tokens: int = 512
27
+ host: str = "0.0.0.0"
28
+ port: int = 7860
29
+ chat_rate_limit: str = "100/minute"
30
+ speech_rate_limit: str = "5/minute"
31
+ external_tts_url: str = Field(..., env="EXTERNAL_TTS_URL")
32
+ external_asr_url: str = Field(..., env="EXTERNAL_ASR_URL")
33
+ external_text_gen_url: str = Field(..., env="EXTERNAL_TEXT_GEN_URL")
34
+ external_audio_proc_url: str = Field(..., env="EXTERNAL_AUDIO_PROC_URL")
35
+ api_key_secret: str = Field(..., env="API_KEY_SECRET")
36
+
37
+ @field_validator("chat_rate_limit", "speech_rate_limit")
38
+ def validate_rate_limit(cls, v):
39
+ if not v.count("/") == 1 or not v.split("/")[0].isdigit():
40
+ raise ValueError("Rate limit must be in format 'number/period' (e.g., '5/minute')")
41
+ return v
42
+
43
+ class Config:
44
+ env_file = ".env"
45
+ env_file_encoding = "utf-8"
46
+
47
+ settings = Settings()
48
+
49
+ # FastAPI app setup
50
+ app = FastAPI(
51
+ title="Dhwani API",
52
+ description="AI Chat API supporting Indian languages",
53
+ version="1.0.0",
54
+ redirect_slashes=False,
55
+ )
56
+ app.add_middleware(
57
+ CORSMiddleware,
58
+ allow_origins=["*"],
59
+ allow_credentials=False,
60
+ allow_methods=["*"],
61
+ allow_headers=["*"],
62
+ )
63
+
64
+ limiter = Limiter(key_func=get_remote_address)
65
+ app.state.limiter = limiter
66
+
67
+ # Request/Response Models
68
+ class SpeechRequest(BaseModel):
69
+ input: str
70
+ voice: str
71
+ model: str
72
+ response_format: ResponseFormat = tts_config.response_format
73
+ speed: float = SPEED
74
+
75
+ @field_validator("input")
76
+ def input_must_be_valid(cls, v):
77
+ if len(v) > 1000:
78
+ raise ValueError("Input cannot exceed 1000 characters")
79
+ return v.strip()
80
+
81
+ @field_validator("response_format")
82
+ def validate_response_format(cls, v):
83
+ supported_formats = [ResponseFormat.MP3, ResponseFormat.FLAC, ResponseFormat.WAV]
84
+ if v not in supported_formats:
85
+ raise ValueError(f"Response format must be one of {[fmt.value for fmt in supported_formats]}")
86
+ return v
87
+
88
+ class TranscriptionResponse(BaseModel):
89
+ text: str
90
+
91
+ class TextGenerationResponse(BaseModel):
92
+ text: str
93
+
94
+ class AudioProcessingResponse(BaseModel):
95
+ result: str
96
+
97
+ # TTS Service Interface
98
+ class TTSService(ABC):
99
+ @abstractmethod
100
+ async def generate_speech(self, payload: dict) -> requests.Response:
101
+ pass
102
+
103
+ class ExternalTTSService(TTSService):
104
+ async def generate_speech(self, payload: dict) -> requests.Response:
105
+ try:
106
+ return requests.post(
107
+ settings.external_tts_url,
108
+ json=payload,
109
+ headers={"accept": "application/json", "Content-Type": "application/json"},
110
+ stream=True,
111
+ timeout=10
112
+ )
113
+ except requests.Timeout:
114
+ raise HTTPException(status_code=504, detail="External TTS API timeout")
115
+ except requests.RequestException as e:
116
+ raise HTTPException(status_code=500, detail=f"External TTS API error: {str(e)}")
117
+
118
+ def get_tts_service() -> TTSService:
119
+ return ExternalTTSService()
120
+
121
+ # Endpoints
122
+ @app.get("/v1/health")
123
+ async def health_check():
124
+ return {"status": "healthy", "model": settings.llm_model_name}
125
+
126
+ @app.get("/")
127
+ async def home():
128
+ return RedirectResponse(url="/docs")
129
+
130
+ @app.post("/v1/audio/speech")
131
+ @limiter.limit(settings.speech_rate_limit)
132
+ async def generate_audio(
133
+ request: Request,
134
+ speech_request: SpeechRequest = Depends(),
135
+ api_key: str = Depends(get_api_key),
136
+ tts_service: TTSService = Depends(get_tts_service)
137
+ ):
138
+ if not speech_request.input.strip():
139
+ raise HTTPException(status_code=400, detail="Input cannot be empty")
140
+
141
+ logger.info("Processing speech request", extra={
142
+ "endpoint": "/v1/audio/speech",
143
+ "input_length": len(speech_request.input),
144
+ "client_ip": get_remote_address(request)
145
+ })
146
+
147
+ payload = {
148
+ "input": speech_request.input,
149
+ "voice": speech_request.voice,
150
+ "model": speech_request.model,
151
+ "response_format": speech_request.response_format.value,
152
+ "speed": speech_request.speed
153
+ }
154
+
155
+ response = await tts_service.generate_speech(payload)
156
+ response.raise_for_status()
157
+
158
+ headers = {
159
+ "Content-Disposition": f"inline; filename=\"speech.{speech_request.response_format.value}\"",
160
+ "Cache-Control": "no-cache",
161
+ "Content-Type": f"audio/{speech_request.response_format.value}"
162
+ }
163
+
164
+ return StreamingResponse(
165
+ response.iter_content(chunk_size=8192),
166
+ media_type=f"audio/{speech_request.response_format.value}",
167
+ headers=headers
168
+ )
169
+
170
+ @app.post("/v1/generate_text/", response_model=TextGenerationResponse)
171
+ @limiter.limit(settings.chat_rate_limit)
172
+ async def generate_text(
173
+ file: UploadFile = File(...),
174
+ language: str = Query(..., enum=["kannada", "hindi", "tamil"]),
175
+ api_key: str = Depends(get_api_key),
176
+ request: Request = None,
177
+ ):
178
+ logger.info("Processing text generation request", extra={
179
+ "endpoint": "/v1/generate_text",
180
+ "filename": file.filename,
181
+ "client_ip": get_remote_address(request)
182
+ })
183
+
184
+ start_time = time()
185
+ try:
186
+ file_content = await file.read()
187
+ files = {"file": (file.filename, file_content, file.content_type)}
188
+
189
+ external_url = f"{settings.external_text_gen_url}/generate_text/?language={language}"
190
+ response = requests.post(
191
+ external_url,
192
+ files=files,
193
+ headers={"accept": "application/json"},
194
+ timeout=10
195
+ )
196
+ response.raise_for_status()
197
+
198
+ generated_text = response.json().get("text", "")
199
+ logger.info(f"Text generation completed in {time() - start_time:.2f} seconds")
200
+ return TextGenerationResponse(text=generated_text)
201
+
202
+ except requests.Timeout:
203
+ raise HTTPException(status_code=504, detail="Text generation service timeout")
204
+ except requests.RequestException as e:
205
+ logger.error(f"Text generation request failed: {str(e)}")
206
+ raise HTTPException(status_code=500, detail=f"Text generation failed: {str(e)}")
207
+
208
+ @app.post("/v1/process_audio/", response_model=AudioProcessingResponse)
209
+ @limiter.limit(settings.chat_rate_limit)
210
+ async def process_audio(
211
+ file: UploadFile = File(...),
212
+ language: str = Query(..., enum=["kannada", "hindi", "tamil"]),
213
+ api_key: str = Depends(get_api_key),
214
+ request: Request = None,
215
+ ):
216
+ logger.info("Processing audio processing request", extra={
217
+ "endpoint": "/v1/process_audio",
218
+ "filename": file.filename,
219
+ "client_ip": get_remote_address(request)
220
+ })
221
+
222
+ start_time = time()
223
+ try:
224
+ file_content = await file.read()
225
+ files = {"file": (file.filename, file_content, file.content_type)}
226
+
227
+ external_url = f"{settings.external_audio_proc_url}/process_audio/?language={language}"
228
+ response = requests.post(
229
+ external_url,
230
+ files=files,
231
+ headers={"accept": "application/json"},
232
+ timeout=10
233
+ )
234
+ response.raise_for_status()
235
+
236
+ processed_result = response.json().get("result", "")
237
+ logger.info(f"Audio processing completed in {time() - start_time:.2f} seconds")
238
+ return AudioProcessingResponse(result=processed_result)
239
+
240
+ except requests.Timeout:
241
+ raise HTTPException(status_code=504, detail="Audio processing service timeout")
242
+ except requests.RequestException as e:
243
+ logger.error(f"Audio processing request failed: {str(e)}")
244
+ raise HTTPException(status_code=500, detail=f"Audio processing failed: {str(e)}")
245
+
246
+ @app.post("/v1/transcribe/", response_model=TranscriptionResponse)
247
+ async def transcribe_audio(
248
+ file: UploadFile = File(...),
249
+ language: str = Query(..., enum=["kannada", "hindi", "tamil"]),
250
+ #api_key: str = Depends(get_api_key),
251
+ request: Request = None,
252
+ ):
253
+ '''
254
+ logger.info("Processing transcription request", extra={
255
+ "endpoint": "/v1/transcribe",
256
+ "filename": file.filename,
257
+ "client_ip": get_remote_address(request)
258
+ })
259
+ '''
260
+ start_time = time()
261
+ try:
262
+ file_content = await file.read()
263
+ files = {"file": (file.filename, file_content, file.content_type)}
264
+
265
+ external_url = f"{settings.external_asr_url}/transcribe/?language={language}"
266
+ response = requests.post(
267
+ external_url,
268
+ files=files,
269
+ headers={"accept": "application/json"},
270
+ timeout=10
271
+ )
272
+ response.raise_for_status()
273
+
274
+ transcription = response.json().get("text", "")
275
+ #logger.info(f"Transcription completed in {time() - start_time:.2f} seconds")
276
+ return TranscriptionResponse(text=transcription)
277
+
278
+ except requests.Timeout:
279
+ raise HTTPException(status_code=504, detail="Transcription service timeout")
280
+ except requests.RequestException as e:
281
+ #logger.error(f"Transcription request failed: {str(e)}")
282
+ raise HTTPException(status_code=500, detail=f"Transcription failed: {str(e)}")
283
+
284
+ @app.post("/v1/chat_v2", response_model=TranscriptionResponse)
285
+ @limiter.limit(settings.chat_rate_limit)
286
+ async def chat_v2(
287
+ request: Request,
288
+ prompt: str = Form(...),
289
+ image: UploadFile = File(default=None),
290
+ api_key: str = Depends(get_api_key)
291
+ ):
292
+ if not prompt:
293
+ raise HTTPException(status_code=400, detail="Prompt cannot be empty")
294
+
295
+ logger.info("Processing chat_v2 request", extra={
296
+ "endpoint": "/v1/chat_v2",
297
+ "prompt_length": len(prompt),
298
+ "has_image": bool(image),
299
+ "client_ip": get_remote_address(request)
300
+ })
301
+
302
+ try:
303
+ # For demonstration, we'll just return the prompt as text
304
+ image_data = Image.open(await image.read()) if image else None
305
+ response_text = f"Processed: {prompt}" + (" with image" if image_data else "")
306
+ return TranscriptionResponse(text=response_text)
307
+ except Exception as e:
308
+ logger.error(f"Chat_v2 processing failed: {str(e)}", exc_info=True)
309
+ raise HTTPException(status_code=500, detail=f"An error occurred: {str(e)}")
310
+
311
+ if __name__ == "__main__":
312
+ parser = argparse.ArgumentParser(description="Run the FastAPI server.")
313
+ parser.add_argument("--port", type=int, default=settings.port, help="Port to run the server on.")
314
+ parser.add_argument("--host", type=str, default=settings.host, help="Host to run the server on.")
315
+ args = parser.parse_args()
316
+ uvicorn.run(app, host=args.host, port=args.port)
src/server/utils/auth.py ADDED
@@ -0,0 +1,21 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from fastapi.security import APIKeyHeader
2
+ from fastapi import HTTPException, status, Depends
3
+ from pydantic_settings import BaseSettings
4
+ from config.logging_config import logger
5
+
6
+ class Settings(BaseSettings):
7
+ api_key: str
8
+ class Config:
9
+ env_file = ".env"
10
+
11
+ settings = Settings()
12
+
13
+ API_KEY_NAME = "X-API-Key"
14
+ api_key_header = APIKeyHeader(name=API_KEY_NAME, auto_error=False)
15
+
16
+ async def get_api_key(api_key: str = Depends(api_key_header)):
17
+ if api_key != settings.api_key:
18
+ logger.warning(f"Failed API key attempt: {api_key}")
19
+ raise HTTPException(status_code=status.HTTP_401_UNAUTHORIZED, detail="Invalid API Key")
20
+ logger.info("API key validated successfully")
21
+ return api_key
src/server/utils/text.py ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ def chunk_text(text: str, chunk_size: int = 15) -> list[str]:
2
+ words = text.split()
3
+ return [' '.join(words[i:i + chunk_size]) for i in range(0, len(words), chunk_size)]