Spaces:
Running
on
CPU Upgrade
Running
on
CPU Upgrade
sachin
commited on
Commit
·
9781b82
1
Parent(s):
7b88e9b
init-asr
Browse files- .dockerignore +82 -0
- .env.server +9 -0
- .gitignore +176 -0
- Dockerfile +34 -0
- docs/menv.md +9 -0
- requirements.txt +7 -0
- src/server/config/logging_config.py +35 -0
- src/server/config/tts_config.py +27 -0
- src/server/main.py +316 -0
- src/server/utils/auth.py +21 -0
- src/server/utils/text.py +3 -0
.dockerignore
ADDED
@@ -0,0 +1,82 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
samples
|
2 |
+
|
3 |
+
*.log
|
4 |
+
venv
|
5 |
+
*.nemo
|
6 |
+
|
7 |
+
# Ignore all Python files except those explicitly copied
|
8 |
+
!kannada_female.wav
|
9 |
+
*.pyc
|
10 |
+
*.pyo
|
11 |
+
*.pyd
|
12 |
+
|
13 |
+
# Ignore all virtual environments
|
14 |
+
venv/
|
15 |
+
env/
|
16 |
+
.env/
|
17 |
+
.venv/
|
18 |
+
__pycache__/
|
19 |
+
|
20 |
+
# Ignore build artifacts
|
21 |
+
build/
|
22 |
+
dist/
|
23 |
+
*.egg-info/
|
24 |
+
|
25 |
+
# Ignore local version control files
|
26 |
+
.git/
|
27 |
+
.gitignore
|
28 |
+
|
29 |
+
# Ignore local environment files
|
30 |
+
.env
|
31 |
+
|
32 |
+
# Ignore local log files
|
33 |
+
*.log
|
34 |
+
|
35 |
+
# Ignore all node_modules
|
36 |
+
node_modules/
|
37 |
+
|
38 |
+
# Ignore all Docker-related files
|
39 |
+
Dockerfile
|
40 |
+
docker-compose.yml
|
41 |
+
|
42 |
+
# Ignore all local development files
|
43 |
+
.vscode/
|
44 |
+
.idea/
|
45 |
+
.pytest_cache/
|
46 |
+
|
47 |
+
# Ignore all test files
|
48 |
+
*.test.*
|
49 |
+
*.spec.*
|
50 |
+
*_test.*
|
51 |
+
*_spec.*
|
52 |
+
|
53 |
+
# Ignore all backup files
|
54 |
+
*.bak
|
55 |
+
*.swp
|
56 |
+
*.tmp
|
57 |
+
*.orig
|
58 |
+
|
59 |
+
# Ignore all documentation files
|
60 |
+
*.md
|
61 |
+
*.txt
|
62 |
+
*.rst
|
63 |
+
|
64 |
+
# Ignore all temporary files
|
65 |
+
*.tmp
|
66 |
+
*.temp
|
67 |
+
*.cache
|
68 |
+
|
69 |
+
# Ignore all user-specific files
|
70 |
+
*.user
|
71 |
+
*.prefs
|
72 |
+
*.rc
|
73 |
+
|
74 |
+
# Ignore all unnecessary directories and files
|
75 |
+
__pycache__
|
76 |
+
__pypackages__
|
77 |
+
|
78 |
+
|
79 |
+
!requirements.txt
|
80 |
+
|
81 |
+
#!model_requirements.txt
|
82 |
+
#!server_requirements.txt
|
.env.server
ADDED
@@ -0,0 +1,9 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
PORT=7860
|
2 |
+
HOST=0.0.0.0
|
3 |
+
SPEECH_RATE_LIMIT=5/minute
|
4 |
+
CHAT_RATE_LIMIT=100/minute
|
5 |
+
EXTERNAL_TTS_URL=https://gaganyatri-tts-indic-server.hf.space/v1/audio/speech
|
6 |
+
EXTERNAL_ASR_URL=https://gaganyatri-asr-indic-server-cpu.hf.space
|
7 |
+
EXTERNAL_TEXT_GEN_URL=https://your-text-gen-service.example.com
|
8 |
+
EXTERNAL_AUDIO_PROC_URL=https://your-audio-proc-service.example.com
|
9 |
+
API_KEY_SECRET=your_secret_key
|
.gitignore
ADDED
@@ -0,0 +1,176 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# Byte-compiled / optimized / DLL files
|
2 |
+
__pycache__/
|
3 |
+
*.py[cod]
|
4 |
+
*$py.class
|
5 |
+
venv_new/
|
6 |
+
NeMo/
|
7 |
+
asr_venv
|
8 |
+
*.nemo
|
9 |
+
# C extensions
|
10 |
+
*.so
|
11 |
+
*.mp3
|
12 |
+
|
13 |
+
!kannada_female.wav
|
14 |
+
# Distribution / packaging
|
15 |
+
.Python
|
16 |
+
build/
|
17 |
+
develop-eggs/
|
18 |
+
dist/
|
19 |
+
downloads/
|
20 |
+
eggs/
|
21 |
+
.eggs/
|
22 |
+
lib/
|
23 |
+
lib64/
|
24 |
+
parts/
|
25 |
+
sdist/
|
26 |
+
var/
|
27 |
+
wheels/
|
28 |
+
share/python-wheels/
|
29 |
+
*.egg-info/
|
30 |
+
.installed.cfg
|
31 |
+
*.egg
|
32 |
+
MANIFEST
|
33 |
+
|
34 |
+
# PyInstaller
|
35 |
+
# Usually these files are written by a python script from a template
|
36 |
+
# before PyInstaller builds the exe, so as to inject date/other infos into it.
|
37 |
+
*.manifest
|
38 |
+
*.spec
|
39 |
+
|
40 |
+
# Installer logs
|
41 |
+
pip-log.txt
|
42 |
+
pip-delete-this-directory.txt
|
43 |
+
|
44 |
+
# Unit test / coverage reports
|
45 |
+
htmlcov/
|
46 |
+
.tox/
|
47 |
+
.nox/
|
48 |
+
.coverage
|
49 |
+
.coverage.*
|
50 |
+
.cache
|
51 |
+
nosetests.xml
|
52 |
+
coverage.xml
|
53 |
+
*.cover
|
54 |
+
*.py,cover
|
55 |
+
.hypothesis/
|
56 |
+
.pytest_cache/
|
57 |
+
cover/
|
58 |
+
|
59 |
+
# Translations
|
60 |
+
*.mo
|
61 |
+
*.pot
|
62 |
+
|
63 |
+
# Django stuff:
|
64 |
+
*.log
|
65 |
+
local_settings.py
|
66 |
+
db.sqlite3
|
67 |
+
db.sqlite3-journal
|
68 |
+
|
69 |
+
# Flask stuff:
|
70 |
+
instance/
|
71 |
+
.webassets-cache
|
72 |
+
|
73 |
+
# Scrapy stuff:
|
74 |
+
.scrapy
|
75 |
+
|
76 |
+
# Sphinx documentation
|
77 |
+
docs/_build/
|
78 |
+
|
79 |
+
# PyBuilder
|
80 |
+
.pybuilder/
|
81 |
+
target/
|
82 |
+
|
83 |
+
# Jupyter Notebook
|
84 |
+
.ipynb_checkpoints
|
85 |
+
|
86 |
+
# IPython
|
87 |
+
profile_default/
|
88 |
+
ipython_config.py
|
89 |
+
|
90 |
+
# pyenv
|
91 |
+
# For a library or package, you might want to ignore these files since the code is
|
92 |
+
# intended to run in multiple environments; otherwise, check them in:
|
93 |
+
# .python-version
|
94 |
+
|
95 |
+
# pipenv
|
96 |
+
# According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
|
97 |
+
# However, in case of collaboration, if having platform-specific dependencies or dependencies
|
98 |
+
# having no cross-platform support, pipenv may install dependencies that don't work, or not
|
99 |
+
# install all needed dependencies.
|
100 |
+
#Pipfile.lock
|
101 |
+
|
102 |
+
# UV
|
103 |
+
# Similar to Pipfile.lock, it is generally recommended to include uv.lock in version control.
|
104 |
+
# This is especially recommended for binary packages to ensure reproducibility, and is more
|
105 |
+
# commonly ignored for libraries.
|
106 |
+
#uv.lock
|
107 |
+
|
108 |
+
# poetry
|
109 |
+
# Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
|
110 |
+
# This is especially recommended for binary packages to ensure reproducibility, and is more
|
111 |
+
# commonly ignored for libraries.
|
112 |
+
# https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
|
113 |
+
#poetry.lock
|
114 |
+
|
115 |
+
# pdm
|
116 |
+
# Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
|
117 |
+
#pdm.lock
|
118 |
+
# pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it
|
119 |
+
# in version control.
|
120 |
+
# https://pdm.fming.dev/latest/usage/project/#working-with-version-control
|
121 |
+
.pdm.toml
|
122 |
+
.pdm-python
|
123 |
+
.pdm-build/
|
124 |
+
|
125 |
+
# PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
|
126 |
+
__pypackages__/
|
127 |
+
|
128 |
+
# Celery stuff
|
129 |
+
celerybeat-schedule
|
130 |
+
celerybeat.pid
|
131 |
+
|
132 |
+
# SageMath parsed files
|
133 |
+
*.sage.py
|
134 |
+
|
135 |
+
# Environments
|
136 |
+
.env
|
137 |
+
.venv
|
138 |
+
env/
|
139 |
+
venv/
|
140 |
+
ENV/
|
141 |
+
env.bak/
|
142 |
+
venv.bak/
|
143 |
+
|
144 |
+
# Spyder project settings
|
145 |
+
.spyderproject
|
146 |
+
.spyproject
|
147 |
+
|
148 |
+
# Rope project settings
|
149 |
+
.ropeproject
|
150 |
+
|
151 |
+
# mkdocs documentation
|
152 |
+
/site
|
153 |
+
|
154 |
+
# mypy
|
155 |
+
.mypy_cache/
|
156 |
+
.dmypy.json
|
157 |
+
dmypy.json
|
158 |
+
|
159 |
+
# Pyre type checker
|
160 |
+
.pyre/
|
161 |
+
|
162 |
+
# pytype static type analyzer
|
163 |
+
.pytype/
|
164 |
+
|
165 |
+
# Cython debug symbols
|
166 |
+
cython_debug/
|
167 |
+
|
168 |
+
# PyCharm
|
169 |
+
# JetBrains specific template is maintained in a separate JetBrains.gitignore that can
|
170 |
+
# be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
|
171 |
+
# and can be added to the global gitignore or merged into this file. For a more nuclear
|
172 |
+
# option (not recommended) you can uncomment the following to ignore the entire idea folder.
|
173 |
+
#.idea/
|
174 |
+
|
175 |
+
# PyPI configuration file
|
176 |
+
.pypirc
|
Dockerfile
ADDED
@@ -0,0 +1,34 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# Use official Python runtime as base image
|
2 |
+
FROM python:3.10-slim
|
3 |
+
|
4 |
+
WORKDIR /app
|
5 |
+
|
6 |
+
# Set environment variables
|
7 |
+
ENV PYTHONDONTWRITEBYTECODE=1
|
8 |
+
ENV PYTHONUNBUFFERED=1
|
9 |
+
|
10 |
+
# Install system dependencies
|
11 |
+
RUN apt-get update && apt-get install -y \
|
12 |
+
gcc \
|
13 |
+
&& rm -rf /var/lib/apt/lists/*
|
14 |
+
|
15 |
+
# Install Python dependencies
|
16 |
+
COPY requirements.txt .
|
17 |
+
RUN pip install --no-cache-dir -r requirements.txt
|
18 |
+
|
19 |
+
# Copy application code
|
20 |
+
COPY . .
|
21 |
+
|
22 |
+
RUN useradd -ms /bin/bash appuser \
|
23 |
+
&& chown -R appuser:appuser /app
|
24 |
+
|
25 |
+
USER appuser
|
26 |
+
# Expose port from settings (7860 from your code)
|
27 |
+
EXPOSE 7860
|
28 |
+
|
29 |
+
# Healthcheck
|
30 |
+
HEALTHCHECK --interval=30s --timeout=3s \
|
31 |
+
CMD curl -f http://localhost:7860/v1/health || exit 1
|
32 |
+
|
33 |
+
# Command to run the application with configurable host/port
|
34 |
+
CMD ["uvicorn", "main:app", "--host", "0.0.0.0", "--port", "7860"]
|
docs/menv.md
ADDED
@@ -0,0 +1,9 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
export PORT=7860
|
2 |
+
export HOST=0.0.0.0
|
3 |
+
export SPEECH_RATE_LIMIT=5/minute
|
4 |
+
export CHAT_RATE_LIMIT=100/minute
|
5 |
+
export EXTERNAL_TTS_URL=https://gaganyatri-tts-indic-server.hf.space/v1/audio/speech
|
6 |
+
export EXTERNAL_ASR_URL=https://gaganyatri-asr-indic-server-cpu.hf.space
|
7 |
+
export EXTERNAL_TEXT_GEN_URL=https://your-text-gen-service.example.com
|
8 |
+
export EXTERNAL_AUDIO_PROC_URL=https://your-audio-proc-service.example.com
|
9 |
+
export API_KEY_SECRET=your_secret_key
|
requirements.txt
ADDED
@@ -0,0 +1,7 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
uvicorn
|
2 |
+
fastapi
|
3 |
+
pydantic_settings
|
4 |
+
slowapi
|
5 |
+
requests
|
6 |
+
python-multipart
|
7 |
+
pillow
|
src/server/config/logging_config.py
ADDED
@@ -0,0 +1,35 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import logging
|
2 |
+
import logging.config
|
3 |
+
from logging.handlers import RotatingFileHandler
|
4 |
+
from .tts_config import config
|
5 |
+
|
6 |
+
logging_config = {
|
7 |
+
"version": 1,
|
8 |
+
"disable_existing_loggers": False,
|
9 |
+
"formatters": {
|
10 |
+
"simple": {"format": "%(asctime)s - %(name)s - %(levelname)s - %(message)s"},
|
11 |
+
},
|
12 |
+
"handlers": {
|
13 |
+
"stdout": {
|
14 |
+
"class": "logging.StreamHandler",
|
15 |
+
"formatter": "simple",
|
16 |
+
"stream": "ext://sys.stdout",
|
17 |
+
},
|
18 |
+
"file": {
|
19 |
+
"class": "logging.handlers.RotatingFileHandler",
|
20 |
+
"formatter": "simple",
|
21 |
+
"filename": "dhwani_api.log",
|
22 |
+
"maxBytes": 10 * 1024 * 1024, # 10MB
|
23 |
+
"backupCount": 5,
|
24 |
+
},
|
25 |
+
},
|
26 |
+
"loggers": {
|
27 |
+
"root": {
|
28 |
+
"level": config.log_level.upper(),
|
29 |
+
"handlers": ["stdout", "file"],
|
30 |
+
},
|
31 |
+
},
|
32 |
+
}
|
33 |
+
|
34 |
+
logging.config.dictConfig(logging_config)
|
35 |
+
logger = logging.getLogger("indic_all_server")
|
src/server/config/tts_config.py
ADDED
@@ -0,0 +1,27 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import enum
|
2 |
+
from pydantic_settings import BaseSettings
|
3 |
+
|
4 |
+
SPEED = 1.0
|
5 |
+
|
6 |
+
class StrEnum(str, enum.Enum):
|
7 |
+
def __str__(self):
|
8 |
+
return str(self.value)
|
9 |
+
|
10 |
+
class ResponseFormat(StrEnum):
|
11 |
+
MP3 = "mp3"
|
12 |
+
FLAC = "flac"
|
13 |
+
WAV = "wav"
|
14 |
+
|
15 |
+
class Config(BaseSettings):
|
16 |
+
log_level: str = "info"
|
17 |
+
model: str = "ai4bharat/indic-parler-tts"
|
18 |
+
max_models: int = 1
|
19 |
+
lazy_load_model: bool = False # Unused now, as all models are lazy-loaded
|
20 |
+
input: str = "ನಿಮ್ಮ ಇನ್ಪುಟ್ ಪಠ್ಯವನ್ನು ಇಲ್ಲಿ ಸೇರಿಸಿ"
|
21 |
+
voice: str = (
|
22 |
+
"Female speaks with a high pitch at a normal pace in a clear, close-sounding environment. "
|
23 |
+
"Her neutral tone is captured with excellent audio quality."
|
24 |
+
)
|
25 |
+
response_format: ResponseFormat = ResponseFormat.MP3
|
26 |
+
|
27 |
+
config = Config()
|
src/server/main.py
ADDED
@@ -0,0 +1,316 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import argparse
|
2 |
+
import io
|
3 |
+
from time import time
|
4 |
+
from typing import List, Optional
|
5 |
+
from abc import ABC, abstractmethod
|
6 |
+
|
7 |
+
import uvicorn
|
8 |
+
from fastapi import Depends, FastAPI, File, HTTPException, Query, Request, UploadFile, Form
|
9 |
+
from fastapi.middleware.cors import CORSMiddleware
|
10 |
+
from fastapi.responses import JSONResponse, RedirectResponse, StreamingResponse
|
11 |
+
from pydantic import BaseModel, Field, field_validator
|
12 |
+
from pydantic_settings import BaseSettings
|
13 |
+
from slowapi import Limiter
|
14 |
+
from slowapi.util import get_remote_address
|
15 |
+
import requests
|
16 |
+
from PIL import Image
|
17 |
+
|
18 |
+
# Assuming these are in your project structure
|
19 |
+
from config.tts_config import SPEED, ResponseFormat, config as tts_config
|
20 |
+
from config.logging_config import logger
|
21 |
+
from utils.auth import get_api_key
|
22 |
+
|
23 |
+
# Configuration settings
|
24 |
+
class Settings(BaseSettings):
|
25 |
+
llm_model_name: str = "google/gemma-3-4b-it"
|
26 |
+
max_tokens: int = 512
|
27 |
+
host: str = "0.0.0.0"
|
28 |
+
port: int = 7860
|
29 |
+
chat_rate_limit: str = "100/minute"
|
30 |
+
speech_rate_limit: str = "5/minute"
|
31 |
+
external_tts_url: str = Field(..., env="EXTERNAL_TTS_URL")
|
32 |
+
external_asr_url: str = Field(..., env="EXTERNAL_ASR_URL")
|
33 |
+
external_text_gen_url: str = Field(..., env="EXTERNAL_TEXT_GEN_URL")
|
34 |
+
external_audio_proc_url: str = Field(..., env="EXTERNAL_AUDIO_PROC_URL")
|
35 |
+
api_key_secret: str = Field(..., env="API_KEY_SECRET")
|
36 |
+
|
37 |
+
@field_validator("chat_rate_limit", "speech_rate_limit")
|
38 |
+
def validate_rate_limit(cls, v):
|
39 |
+
if not v.count("/") == 1 or not v.split("/")[0].isdigit():
|
40 |
+
raise ValueError("Rate limit must be in format 'number/period' (e.g., '5/minute')")
|
41 |
+
return v
|
42 |
+
|
43 |
+
class Config:
|
44 |
+
env_file = ".env"
|
45 |
+
env_file_encoding = "utf-8"
|
46 |
+
|
47 |
+
settings = Settings()
|
48 |
+
|
49 |
+
# FastAPI app setup
|
50 |
+
app = FastAPI(
|
51 |
+
title="Dhwani API",
|
52 |
+
description="AI Chat API supporting Indian languages",
|
53 |
+
version="1.0.0",
|
54 |
+
redirect_slashes=False,
|
55 |
+
)
|
56 |
+
app.add_middleware(
|
57 |
+
CORSMiddleware,
|
58 |
+
allow_origins=["*"],
|
59 |
+
allow_credentials=False,
|
60 |
+
allow_methods=["*"],
|
61 |
+
allow_headers=["*"],
|
62 |
+
)
|
63 |
+
|
64 |
+
limiter = Limiter(key_func=get_remote_address)
|
65 |
+
app.state.limiter = limiter
|
66 |
+
|
67 |
+
# Request/Response Models
|
68 |
+
class SpeechRequest(BaseModel):
|
69 |
+
input: str
|
70 |
+
voice: str
|
71 |
+
model: str
|
72 |
+
response_format: ResponseFormat = tts_config.response_format
|
73 |
+
speed: float = SPEED
|
74 |
+
|
75 |
+
@field_validator("input")
|
76 |
+
def input_must_be_valid(cls, v):
|
77 |
+
if len(v) > 1000:
|
78 |
+
raise ValueError("Input cannot exceed 1000 characters")
|
79 |
+
return v.strip()
|
80 |
+
|
81 |
+
@field_validator("response_format")
|
82 |
+
def validate_response_format(cls, v):
|
83 |
+
supported_formats = [ResponseFormat.MP3, ResponseFormat.FLAC, ResponseFormat.WAV]
|
84 |
+
if v not in supported_formats:
|
85 |
+
raise ValueError(f"Response format must be one of {[fmt.value for fmt in supported_formats]}")
|
86 |
+
return v
|
87 |
+
|
88 |
+
class TranscriptionResponse(BaseModel):
|
89 |
+
text: str
|
90 |
+
|
91 |
+
class TextGenerationResponse(BaseModel):
|
92 |
+
text: str
|
93 |
+
|
94 |
+
class AudioProcessingResponse(BaseModel):
|
95 |
+
result: str
|
96 |
+
|
97 |
+
# TTS Service Interface
|
98 |
+
class TTSService(ABC):
|
99 |
+
@abstractmethod
|
100 |
+
async def generate_speech(self, payload: dict) -> requests.Response:
|
101 |
+
pass
|
102 |
+
|
103 |
+
class ExternalTTSService(TTSService):
|
104 |
+
async def generate_speech(self, payload: dict) -> requests.Response:
|
105 |
+
try:
|
106 |
+
return requests.post(
|
107 |
+
settings.external_tts_url,
|
108 |
+
json=payload,
|
109 |
+
headers={"accept": "application/json", "Content-Type": "application/json"},
|
110 |
+
stream=True,
|
111 |
+
timeout=10
|
112 |
+
)
|
113 |
+
except requests.Timeout:
|
114 |
+
raise HTTPException(status_code=504, detail="External TTS API timeout")
|
115 |
+
except requests.RequestException as e:
|
116 |
+
raise HTTPException(status_code=500, detail=f"External TTS API error: {str(e)}")
|
117 |
+
|
118 |
+
def get_tts_service() -> TTSService:
|
119 |
+
return ExternalTTSService()
|
120 |
+
|
121 |
+
# Endpoints
|
122 |
+
@app.get("/v1/health")
|
123 |
+
async def health_check():
|
124 |
+
return {"status": "healthy", "model": settings.llm_model_name}
|
125 |
+
|
126 |
+
@app.get("/")
|
127 |
+
async def home():
|
128 |
+
return RedirectResponse(url="/docs")
|
129 |
+
|
130 |
+
@app.post("/v1/audio/speech")
|
131 |
+
@limiter.limit(settings.speech_rate_limit)
|
132 |
+
async def generate_audio(
|
133 |
+
request: Request,
|
134 |
+
speech_request: SpeechRequest = Depends(),
|
135 |
+
api_key: str = Depends(get_api_key),
|
136 |
+
tts_service: TTSService = Depends(get_tts_service)
|
137 |
+
):
|
138 |
+
if not speech_request.input.strip():
|
139 |
+
raise HTTPException(status_code=400, detail="Input cannot be empty")
|
140 |
+
|
141 |
+
logger.info("Processing speech request", extra={
|
142 |
+
"endpoint": "/v1/audio/speech",
|
143 |
+
"input_length": len(speech_request.input),
|
144 |
+
"client_ip": get_remote_address(request)
|
145 |
+
})
|
146 |
+
|
147 |
+
payload = {
|
148 |
+
"input": speech_request.input,
|
149 |
+
"voice": speech_request.voice,
|
150 |
+
"model": speech_request.model,
|
151 |
+
"response_format": speech_request.response_format.value,
|
152 |
+
"speed": speech_request.speed
|
153 |
+
}
|
154 |
+
|
155 |
+
response = await tts_service.generate_speech(payload)
|
156 |
+
response.raise_for_status()
|
157 |
+
|
158 |
+
headers = {
|
159 |
+
"Content-Disposition": f"inline; filename=\"speech.{speech_request.response_format.value}\"",
|
160 |
+
"Cache-Control": "no-cache",
|
161 |
+
"Content-Type": f"audio/{speech_request.response_format.value}"
|
162 |
+
}
|
163 |
+
|
164 |
+
return StreamingResponse(
|
165 |
+
response.iter_content(chunk_size=8192),
|
166 |
+
media_type=f"audio/{speech_request.response_format.value}",
|
167 |
+
headers=headers
|
168 |
+
)
|
169 |
+
|
170 |
+
@app.post("/v1/generate_text/", response_model=TextGenerationResponse)
|
171 |
+
@limiter.limit(settings.chat_rate_limit)
|
172 |
+
async def generate_text(
|
173 |
+
file: UploadFile = File(...),
|
174 |
+
language: str = Query(..., enum=["kannada", "hindi", "tamil"]),
|
175 |
+
api_key: str = Depends(get_api_key),
|
176 |
+
request: Request = None,
|
177 |
+
):
|
178 |
+
logger.info("Processing text generation request", extra={
|
179 |
+
"endpoint": "/v1/generate_text",
|
180 |
+
"filename": file.filename,
|
181 |
+
"client_ip": get_remote_address(request)
|
182 |
+
})
|
183 |
+
|
184 |
+
start_time = time()
|
185 |
+
try:
|
186 |
+
file_content = await file.read()
|
187 |
+
files = {"file": (file.filename, file_content, file.content_type)}
|
188 |
+
|
189 |
+
external_url = f"{settings.external_text_gen_url}/generate_text/?language={language}"
|
190 |
+
response = requests.post(
|
191 |
+
external_url,
|
192 |
+
files=files,
|
193 |
+
headers={"accept": "application/json"},
|
194 |
+
timeout=10
|
195 |
+
)
|
196 |
+
response.raise_for_status()
|
197 |
+
|
198 |
+
generated_text = response.json().get("text", "")
|
199 |
+
logger.info(f"Text generation completed in {time() - start_time:.2f} seconds")
|
200 |
+
return TextGenerationResponse(text=generated_text)
|
201 |
+
|
202 |
+
except requests.Timeout:
|
203 |
+
raise HTTPException(status_code=504, detail="Text generation service timeout")
|
204 |
+
except requests.RequestException as e:
|
205 |
+
logger.error(f"Text generation request failed: {str(e)}")
|
206 |
+
raise HTTPException(status_code=500, detail=f"Text generation failed: {str(e)}")
|
207 |
+
|
208 |
+
@app.post("/v1/process_audio/", response_model=AudioProcessingResponse)
|
209 |
+
@limiter.limit(settings.chat_rate_limit)
|
210 |
+
async def process_audio(
|
211 |
+
file: UploadFile = File(...),
|
212 |
+
language: str = Query(..., enum=["kannada", "hindi", "tamil"]),
|
213 |
+
api_key: str = Depends(get_api_key),
|
214 |
+
request: Request = None,
|
215 |
+
):
|
216 |
+
logger.info("Processing audio processing request", extra={
|
217 |
+
"endpoint": "/v1/process_audio",
|
218 |
+
"filename": file.filename,
|
219 |
+
"client_ip": get_remote_address(request)
|
220 |
+
})
|
221 |
+
|
222 |
+
start_time = time()
|
223 |
+
try:
|
224 |
+
file_content = await file.read()
|
225 |
+
files = {"file": (file.filename, file_content, file.content_type)}
|
226 |
+
|
227 |
+
external_url = f"{settings.external_audio_proc_url}/process_audio/?language={language}"
|
228 |
+
response = requests.post(
|
229 |
+
external_url,
|
230 |
+
files=files,
|
231 |
+
headers={"accept": "application/json"},
|
232 |
+
timeout=10
|
233 |
+
)
|
234 |
+
response.raise_for_status()
|
235 |
+
|
236 |
+
processed_result = response.json().get("result", "")
|
237 |
+
logger.info(f"Audio processing completed in {time() - start_time:.2f} seconds")
|
238 |
+
return AudioProcessingResponse(result=processed_result)
|
239 |
+
|
240 |
+
except requests.Timeout:
|
241 |
+
raise HTTPException(status_code=504, detail="Audio processing service timeout")
|
242 |
+
except requests.RequestException as e:
|
243 |
+
logger.error(f"Audio processing request failed: {str(e)}")
|
244 |
+
raise HTTPException(status_code=500, detail=f"Audio processing failed: {str(e)}")
|
245 |
+
|
246 |
+
@app.post("/v1/transcribe/", response_model=TranscriptionResponse)
|
247 |
+
async def transcribe_audio(
|
248 |
+
file: UploadFile = File(...),
|
249 |
+
language: str = Query(..., enum=["kannada", "hindi", "tamil"]),
|
250 |
+
#api_key: str = Depends(get_api_key),
|
251 |
+
request: Request = None,
|
252 |
+
):
|
253 |
+
'''
|
254 |
+
logger.info("Processing transcription request", extra={
|
255 |
+
"endpoint": "/v1/transcribe",
|
256 |
+
"filename": file.filename,
|
257 |
+
"client_ip": get_remote_address(request)
|
258 |
+
})
|
259 |
+
'''
|
260 |
+
start_time = time()
|
261 |
+
try:
|
262 |
+
file_content = await file.read()
|
263 |
+
files = {"file": (file.filename, file_content, file.content_type)}
|
264 |
+
|
265 |
+
external_url = f"{settings.external_asr_url}/transcribe/?language={language}"
|
266 |
+
response = requests.post(
|
267 |
+
external_url,
|
268 |
+
files=files,
|
269 |
+
headers={"accept": "application/json"},
|
270 |
+
timeout=10
|
271 |
+
)
|
272 |
+
response.raise_for_status()
|
273 |
+
|
274 |
+
transcription = response.json().get("text", "")
|
275 |
+
#logger.info(f"Transcription completed in {time() - start_time:.2f} seconds")
|
276 |
+
return TranscriptionResponse(text=transcription)
|
277 |
+
|
278 |
+
except requests.Timeout:
|
279 |
+
raise HTTPException(status_code=504, detail="Transcription service timeout")
|
280 |
+
except requests.RequestException as e:
|
281 |
+
#logger.error(f"Transcription request failed: {str(e)}")
|
282 |
+
raise HTTPException(status_code=500, detail=f"Transcription failed: {str(e)}")
|
283 |
+
|
284 |
+
@app.post("/v1/chat_v2", response_model=TranscriptionResponse)
|
285 |
+
@limiter.limit(settings.chat_rate_limit)
|
286 |
+
async def chat_v2(
|
287 |
+
request: Request,
|
288 |
+
prompt: str = Form(...),
|
289 |
+
image: UploadFile = File(default=None),
|
290 |
+
api_key: str = Depends(get_api_key)
|
291 |
+
):
|
292 |
+
if not prompt:
|
293 |
+
raise HTTPException(status_code=400, detail="Prompt cannot be empty")
|
294 |
+
|
295 |
+
logger.info("Processing chat_v2 request", extra={
|
296 |
+
"endpoint": "/v1/chat_v2",
|
297 |
+
"prompt_length": len(prompt),
|
298 |
+
"has_image": bool(image),
|
299 |
+
"client_ip": get_remote_address(request)
|
300 |
+
})
|
301 |
+
|
302 |
+
try:
|
303 |
+
# For demonstration, we'll just return the prompt as text
|
304 |
+
image_data = Image.open(await image.read()) if image else None
|
305 |
+
response_text = f"Processed: {prompt}" + (" with image" if image_data else "")
|
306 |
+
return TranscriptionResponse(text=response_text)
|
307 |
+
except Exception as e:
|
308 |
+
logger.error(f"Chat_v2 processing failed: {str(e)}", exc_info=True)
|
309 |
+
raise HTTPException(status_code=500, detail=f"An error occurred: {str(e)}")
|
310 |
+
|
311 |
+
if __name__ == "__main__":
|
312 |
+
parser = argparse.ArgumentParser(description="Run the FastAPI server.")
|
313 |
+
parser.add_argument("--port", type=int, default=settings.port, help="Port to run the server on.")
|
314 |
+
parser.add_argument("--host", type=str, default=settings.host, help="Host to run the server on.")
|
315 |
+
args = parser.parse_args()
|
316 |
+
uvicorn.run(app, host=args.host, port=args.port)
|
src/server/utils/auth.py
ADDED
@@ -0,0 +1,21 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from fastapi.security import APIKeyHeader
|
2 |
+
from fastapi import HTTPException, status, Depends
|
3 |
+
from pydantic_settings import BaseSettings
|
4 |
+
from config.logging_config import logger
|
5 |
+
|
6 |
+
class Settings(BaseSettings):
|
7 |
+
api_key: str
|
8 |
+
class Config:
|
9 |
+
env_file = ".env"
|
10 |
+
|
11 |
+
settings = Settings()
|
12 |
+
|
13 |
+
API_KEY_NAME = "X-API-Key"
|
14 |
+
api_key_header = APIKeyHeader(name=API_KEY_NAME, auto_error=False)
|
15 |
+
|
16 |
+
async def get_api_key(api_key: str = Depends(api_key_header)):
|
17 |
+
if api_key != settings.api_key:
|
18 |
+
logger.warning(f"Failed API key attempt: {api_key}")
|
19 |
+
raise HTTPException(status_code=status.HTTP_401_UNAUTHORIZED, detail="Invalid API Key")
|
20 |
+
logger.info("API key validated successfully")
|
21 |
+
return api_key
|
src/server/utils/text.py
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
def chunk_text(text: str, chunk_size: int = 15) -> list[str]:
|
2 |
+
words = text.split()
|
3 |
+
return [' '.join(words[i:i + chunk_size]) for i in range(0, len(words), chunk_size)]
|