Spaces:

slabstech
/

dhwani-internal-api-server

Sleeping

App Files Files Community

sachin commited on 18 days ago

Commit

475b0b9

1 Parent(s): b9d432e

test-gemma-llm

Browse files

Files changed (9) hide show

.gitignore +176 -0
Dockerfile +36 -0
requirements.txt +10 -0
src/server/auth.py +21 -0
src/server/gemma_llm.py +186 -0
src/server/logging_config.py +35 -0
src/server/main.py +367 -0
src/server/text.py +3 -0
src/server/tts_config.py +27 -0

.gitignore ADDED Viewed

	@@ -0,0 +1,176 @@

+# Byte-compiled / optimized / DLL files
+__pycache__/
+*.py[cod]
+*$py.class
+venv_new/
+NeMo/
+asr_venv
+*.nemo
+# C extensions
+*.so
+*.mp3
+!kannada_female.wav
+# Distribution / packaging
+.Python
+build/
+develop-eggs/
+dist/
+downloads/
+eggs/
+.eggs/
+lib/
+lib64/
+parts/
+sdist/
+var/
+wheels/
+share/python-wheels/
+*.egg-info/
+.installed.cfg
+*.egg
+MANIFEST
+# PyInstaller
+#  Usually these files are written by a python script from a template
+#  before PyInstaller builds the exe, so as to inject date/other infos into it.
+*.manifest
+*.spec
+# Installer logs
+pip-log.txt
+pip-delete-this-directory.txt
+# Unit test / coverage reports
+htmlcov/
+.tox/
+.nox/
+.coverage
+.coverage.*
+.cache
+nosetests.xml
+coverage.xml
+*.cover
+*.py,cover
+.hypothesis/
+.pytest_cache/
+cover/
+# Translations
+*.mo
+*.pot
+# Django stuff:
+*.log
+local_settings.py
+db.sqlite3
+db.sqlite3-journal
+# Flask stuff:
+instance/
+.webassets-cache
+# Scrapy stuff:
+.scrapy
+# Sphinx documentation
+docs/_build/
+# PyBuilder
+.pybuilder/
+target/
+# Jupyter Notebook
+.ipynb_checkpoints
+# IPython
+profile_default/
+ipython_config.py
+# pyenv
+#   For a library or package, you might want to ignore these files since the code is
+#   intended to run in multiple environments; otherwise, check them in:
+# .python-version
+# pipenv
+#   According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
+#   However, in case of collaboration, if having platform-specific dependencies or dependencies
+#   having no cross-platform support, pipenv may install dependencies that don't work, or not
+#   install all needed dependencies.
+#Pipfile.lock
+# UV
+#   Similar to Pipfile.lock, it is generally recommended to include uv.lock in version control.
+#   This is especially recommended for binary packages to ensure reproducibility, and is more
+#   commonly ignored for libraries.
+#uv.lock
+# poetry
+#   Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
+#   This is especially recommended for binary packages to ensure reproducibility, and is more
+#   commonly ignored for libraries.
+#   https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
+#poetry.lock
+# pdm
+#   Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
+#pdm.lock
+#   pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it
+#   in version control.
+#   https://pdm.fming.dev/latest/usage/project/#working-with-version-control
+.pdm.toml
+.pdm-python
+.pdm-build/
+# PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
+__pypackages__/
+# Celery stuff
+celerybeat-schedule
+celerybeat.pid
+# SageMath parsed files
+*.sage.py
+# Environments
+.env
+.venv
+env/
+venv/
+ENV/
+env.bak/
+venv.bak/
+# Spyder project settings
+.spyderproject
+.spyproject
+# Rope project settings
+.ropeproject
+# mkdocs documentation
+/site
+# mypy
+.mypy_cache/
+.dmypy.json
+dmypy.json
+# Pyre type checker
+.pyre/
+# pytype static type analyzer
+.pytype/
+# Cython debug symbols
+cython_debug/
+# PyCharm
+#  JetBrains specific template is maintained in a separate JetBrains.gitignore that can
+#  be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
+#  and can be added to the global gitignore or merged into this file.  For a more nuclear
+#  option (not recommended) you can uncomment the following to ignore the entire idea folder.
+#.idea/
+# PyPI configuration file
+.pypirc

Dockerfile ADDED Viewed

	@@ -0,0 +1,36 @@

+FROM ubuntu:22.04
+WORKDIR /app
+RUN apt-get update && apt-get install -y \
+    python3 \
+    python3-pip \
+    git \
+    ffmpeg \
+    sudo \
+    wget libvips\
+    build-essential \
+    curl \
+    && ln -s /usr/bin/python3 /usr/bin/python \
+    && rm -rf /var/lib/apt/lists/*
+RUN curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs | sh -s -- -y
+ENV PATH="/root/.cargo/bin:${PATH}"
+RUN pip install --upgrade pip setuptools setuptools-rust torch
+COPY requirements.txt .
+#RUN pip install --no-cache-dir torch==2.6.0 torchvision
+#RUN pip install --no-cache-dir transformers
+RUN pip install --no-cache-dir -r requirements.txt
+RUN pip install git+https://github.com/ai4bharat/IndicF5.git
+COPY . .
+RUN useradd -ms /bin/bash appuser \
+    && chown -R appuser:appuser /app
+USER appuser
+EXPOSE 7860
+# Use absolute path for clarity
+CMD ["python", "/app/src/server/main.py", "--host", "0.0.0.0", "--port", "7860"]

requirements.txt ADDED Viewed

	@@ -0,0 +1,10 @@

+transformers @ git+https://github.com/huggingface/[email protected]
+torch
+accelerate
+bitsandbytes
+pillow
+uvicorn
+fastapi
+pydantic_settings
+slowapi
+python-multipart

src/server/auth.py ADDED Viewed

	@@ -0,0 +1,21 @@

+from fastapi.security import APIKeyHeader
+from fastapi import HTTPException, status, Depends
+from pydantic_settings import BaseSettings
+from config.logging_config import logger
+class Settings(BaseSettings):
+    api_key: str
+    class Config:
+        env_file = ".env"
+settings = Settings()
+API_KEY_NAME = "X-API-Key"
+api_key_header = APIKeyHeader(name=API_KEY_NAME, auto_error=False)
+async def get_api_key(api_key: str = Depends(api_key_header)):
+    if api_key != settings.api_key:
+        logger.warning(f"Failed API key attempt: {api_key}")
+        raise HTTPException(status_code=status.HTTP_401_UNAUTHORIZED, detail="Invalid API Key")
+    logger.info("API key validated successfully")
+    return api_key

src/server/gemma_llm.py ADDED Viewed

	@@ -0,0 +1,186 @@

+import torch
+from config.logging_config import logger
+from transformers import AutoProcessor, Gemma3ForConditionalGeneration
+from PIL import Image
+from fastapi import HTTPException
+from io import BytesIO
+class LLMManager:
+    def __init__(self, model_name: str, device: str = "cuda" if torch.cuda.is_available() else "cpu"):
+        self.model_name = model_name
+        self.device = torch.device(device)
+        self.torch_dtype = torch.float16 if self.device.type != "cpu" else torch.float32
+        self.model = None
+        self.is_loaded = False
+        self.processor = None
+    def unload(self):
+        if self.is_loaded:
+            # Delete the model and processor to free memory
+            del self.model
+            del self.processor
+            # If using CUDA, clear the cache to free GPU memory
+            if self.device.type == "cuda":
+                torch.cuda.empty_cache()
+            self.is_loaded = False
+            logger.info(f"LLM {self.model_name} unloaded from {self.device}")
+    def load(self):
+        if not self.is_loaded:
+            #self.model_name = "google/gemma-3-4b-it"
+            self.model = Gemma3ForConditionalGeneration.from_pretrained(
+                self.model_name, device_map="auto"
+                ).eval()
+            self.processor = AutoProcessor.from_pretrained(self.model_name)
+            self.is_loaded = True
+            logger.info(f"LLM {self.model_name} loaded on {self.device}")
+    async def generate(self, prompt: str, max_tokens: int = 2048, temperature: float = 0.7) -> str:
+        if not self.is_loaded:
+            self.load()
+        messages_vlm = [
+            {
+                "role": "system",
+                "content": [{"type": "text", "text": "You are Dhwani, a helpful assistant. Answer questions considering India as base country and karnataka as base state, Provide a concise response in one sentence maximum."}]
+            },
+            {
+                "role": "user",
+                "content": []
+            }
+        ]
+        # Add text prompt to user content
+        messages_vlm[1]["content"].append({"type": "text", "text": prompt})
+                # Process the chat template with the processor
+        inputs_vlm = self.processor.apply_chat_template(
+            messages_vlm,
+            add_generation_prompt=True,
+            tokenize=True,
+            return_dict=True,
+            return_tensors="pt"
+        ).to(self.model.device, dtype=torch.bfloat16)
+        input_len = inputs_vlm["input_ids"].shape[-1]
+        # Generate response
+        with torch.inference_mode():
+            generation = self.model.generate(**inputs_vlm, max_new_tokens=100, do_sample=False)
+            generation = generation[0][input_len:]
+        # Decode the output
+        response = self.processor.decode(generation, skip_special_tokens=True)
+        return response
+    async def vision_query(self, image: Image.Image, query: str) -> str:
+        if not self.is_loaded:
+            self.load()
+        messages_vlm = [
+            {
+                "role": "system",
+                "content": [{"type": "text", "text": "You are Dhwani, a helpful assistant. Summarise your answer in max 2 lines."}]
+            },
+            {
+                "role": "user",
+                "content": []
+            }
+        ]
+        # Add text prompt to user content
+        messages_vlm[1]["content"].append({"type": "text", "text": query})
+        # Handle image if provided and valid
+        if image and image.size[0] > 0 and image.size[1] > 0:  # Check for valid dimensions
+            # Image is already a PIL Image, no need to read or reopen
+            messages_vlm[1]["content"].insert(0, {"type": "image", "image": image})
+            logger.info(f"Received valid image for processing")
+        else:
+            logger.info("No valid image provided, processing text only")
+        # Process the chat template with the processor
+        try:
+            inputs_vlm = self.processor.apply_chat_template(
+                messages_vlm,
+                add_generation_prompt=True,
+                tokenize=True,
+                return_dict=True,
+                return_tensors="pt"
+            ).to(self.model.device, dtype=torch.bfloat16)
+        except Exception as e:
+            logger.error(f"Error in apply_chat_template: {str(e)}")
+            raise HTTPException(status_code=500, detail=f"Failed to process input: {str(e)}")
+        input_len = inputs_vlm["input_ids"].shape[-1]
+        # Generate response
+        with torch.inference_mode():
+            generation = self.model.generate(**inputs_vlm, max_new_tokens=100, do_sample=False)
+            generation = generation[0][input_len:]
+        # Decode the output
+        decoded = self.processor.decode(generation, skip_special_tokens=True)
+        logger.info(f"Chat Response: {decoded}")
+        return decoded
+    async def chat_v2(self, image: Image.Image, query: str) -> str:
+        if not self.is_loaded:
+            self.load()
+             # Construct the message structure
+        messages_vlm = [
+            {
+                "role": "system",
+                "content": [{"type": "text", "text": "You are Dhwani, a helpful assistant. Answer questions considering India as base country and karnataka as base state"}]
+            },
+            {
+                "role": "user",
+                "content": []
+            }
+        ]
+        # Add text prompt to user content
+        messages_vlm[1]["content"].append({"type": "text", "text": query})
+        # Handle image only if provided and valid
+        if image and image.file and image.size > 0:  # Check for valid file with content
+            # Read the image file
+            image_data = await image.read()
+            if not image_data:
+                raise HTTPException(status_code=400, detail="Uploaded image is empty")
+            # Open image with PIL for processing
+            img = Image.open(BytesIO(image_data))
+            # Add image to content (assuming processor accepts PIL images)
+            messages_vlm[1]["content"].insert(0, {"type": "image", "image": img})
+            logger.info(f"Received image: {image.filename}")
+        else:
+            if image and (not image.file or image.size == 0):
+                logger.warning("Received invalid or empty image parameter, treating as text-only")
+            logger.info("No valid image provided, processing text only")
+        # Process the chat template with the processor
+        inputs_vlm = self.processor.apply_chat_template(
+            messages_vlm,
+            add_generation_prompt=True,
+            tokenize=True,
+            return_dict=True,
+            return_tensors="pt"
+        ).to(self.model.device, dtype=torch.bfloat16)
+        input_len = inputs_vlm["input_ids"].shape[-1]
+        # Generate response
+        with torch.inference_mode():
+            generation = self.model.generate(**inputs_vlm, max_new_tokens=100, do_sample=False)
+            generation = generation[0][input_len:]
+        # Decode the output
+        decoded = self.processor.decode(generation, skip_special_tokens=True)
+        logger.info(f"Chat Response: {decoded}")
+        return decoded

src/server/logging_config.py ADDED Viewed

	@@ -0,0 +1,35 @@

+import logging
+import logging.config
+from logging.handlers import RotatingFileHandler
+from .tts_config import config
+logging_config = {
+    "version": 1,
+    "disable_existing_loggers": False,
+    "formatters": {
+        "simple": {"format": "%(asctime)s - %(name)s - %(levelname)s - %(message)s"},
+    },
+    "handlers": {
+        "stdout": {
+            "class": "logging.StreamHandler",
+            "formatter": "simple",
+            "stream": "ext://sys.stdout",
+        },
+        "file": {
+            "class": "logging.handlers.RotatingFileHandler",
+            "formatter": "simple",
+            "filename": "dhwani_api.log",
+            "maxBytes": 10 * 1024 * 1024,  # 10MB
+            "backupCount": 5,
+        },
+    },
+    "loggers": {
+        "root": {
+            "level": config.log_level.upper(),
+            "handlers": ["stdout", "file"],
+        },
+    },
+}
+logging.config.dictConfig(logging_config)
+logger = logging.getLogger("indic_all_server")

src/server/main.py ADDED Viewed

	@@ -0,0 +1,367 @@

+import argparse
+import io
+import os
+from time import time
+from typing import List
+import tempfile
+import uvicorn
+from fastapi import Depends, FastAPI, File, HTTPException, Query, Request, UploadFile, Body, Form
+from fastapi.middleware.cors import CORSMiddleware
+from fastapi.responses import JSONResponse, RedirectResponse, StreamingResponse
+from PIL import Image
+from pydantic import BaseModel, field_validator
+from pydantic_settings import BaseSettings
+from slowapi import Limiter
+from slowapi.util import get_remote_address
+import requests
+from logging_config import logger
+from tts_config import SPEED, ResponseFormat, config as tts_config
+from gemma_llm import LLMManager
+from utils.auth import get_api_key, settings as auth_settings
+# Supported language codes
+SUPPORTED_LANGUAGES = {
+    "asm_Beng", "kas_Arab", "pan_Guru", "ben_Beng", "kas_Deva", "san_Deva",
+    "brx_Deva", "mai_Deva", "sat_Olck", "doi_Deva", "mal_Mlym", "snd_Arab",
+    "eng_Latn", "mar_Deva", "snd_Deva", "gom_Deva", "mni_Beng", "tam_Taml",
+    "guj_Gujr", "mni_Mtei", "tel_Telu", "hin_Deva", "npi_Deva", "urd_Arab",
+    "kan_Knda", "ory_Orya"
+}
+class Settings(BaseSettings):
+    llm_model_name: str = "google/gemma-3-4b-it"
+    max_tokens: int = 512
+    host: str = "0.0.0.0"
+    port: int = 7860
+    chat_rate_limit: str = "100/minute"
+    speech_rate_limit: str = "5/minute"
+    @field_validator("chat_rate_limit", "speech_rate_limit")
+    def validate_rate_limit(cls, v):
+        if not v.count("/") == 1 or not v.split("/")[0].isdigit():
+            raise ValueError("Rate limit must be in format 'number/period' (e.g., '5/minute')")
+        return v
+    class Config:
+        env_file = ".env"
+settings = Settings()
+app = FastAPI(
+    title="Dhwani API",
+    description="AI Chat API supporting Indian languages",
+    version="1.0.0",
+    redirect_slashes=False,
+)
+app.add_middleware(
+    CORSMiddleware,
+    allow_origins=["*"],
+    allow_credentials=False,
+    allow_methods=["*"],
+    allow_headers=["*"],
+)
+limiter = Limiter(key_func=get_remote_address)
+app.state.limiter = limiter
+llm_manager = LLMManager(settings.llm_model_name)
+class ChatRequest(BaseModel):
+    prompt: str
+    src_lang: str = "kan_Knda"  # Default to Kannada
+    tgt_lang: str = "kan_Knda"  # Default to Kannada
+    @field_validator("prompt")
+    def prompt_must_be_valid(cls, v):
+        if len(v) > 1000:
+            raise ValueError("Prompt cannot exceed 1000 characters")
+        return v.strip()
+    @field_validator("src_lang", "tgt_lang")
+    def validate_language(cls, v):
+        if v not in SUPPORTED_LANGUAGES:
+            raise ValueError(f"Unsupported language code: {v}. Supported codes: {', '.join(SUPPORTED_LANGUAGES)}")
+        return v
+class ChatResponse(BaseModel):
+    response: str
+class TranslationRequest(BaseModel):
+    sentences: List[str]
+    src_lang: str
+    tgt_lang: str
+    @field_validator("src_lang", "tgt_lang")
+    def validate_language(cls, v):
+        if v not in SUPPORTED_LANGUAGES:
+            raise ValueError(f"Unsupported language code: {v}. Supported codes: {', '.join(SUPPORTED_LANGUAGES)}")
+        return v
+class TranslationResponse(BaseModel):
+    translations: List[str]
+async def call_external_translation(sentences: List[str], src_lang: str, tgt_lang: str) -> List[str]:
+    external_url = "https://gaganyatri-dhwani-server.hf.space/v1/translate"
+    payload = {
+        "sentences": sentences,
+        "src_lang": src_lang,
+        "tgt_lang": tgt_lang
+    }
+    try:
+        response = requests.post(
+            external_url,
+            json=payload,
+            headers={
+                "accept": "application/json",
+                "Content-Type": "application/json"
+            },
+            timeout=10
+        )
+        response.raise_for_status()
+        translations = response.json().get("translations", [])
+        if not translations or len(translations) != len(sentences):
+            logger.warning(f"Unexpected response format: {response.json()}")
+            raise ValueError("Invalid response from translation service")
+        return translations
+    except requests.Timeout:
+        logger.error("Translation request timed out")
+        raise HTTPException(status_code=504, detail="Translation service timeout")
+    except requests.RequestException as e:
+        logger.error(f"Error during translation: {str(e)}")
+        raise HTTPException(status_code=500, detail=f"Translation failed: {str(e)}")
+    except ValueError as e:
+        logger.error(f"Invalid response: {str(e)}")
+        raise HTTPException(status_code=500, detail=str(e))
+@app.get("/v1/health")
+async def health_check():
+    return {"status": "healthy", "model": settings.llm_model_name}
+@app.get("/")
+async def home():
+    return RedirectResponse(url="/docs")
+@app.post("/v1/unload_all_models")
+async def unload_all_models(api_key: str = Depends(get_api_key)):
+    try:
+        logger.info("Starting to unload all models...")
+        llm_manager.unload()
+        logger.info("All models unloaded successfully")
+        return {"status": "success", "message": "All models unloaded"}
+    except Exception as e:
+        logger.error(f"Error unloading models: {str(e)}")
+        raise HTTPException(status_code=500, detail=f"Failed to unload models: {str(e)}")
+@app.post("/v1/load_all_models")
+async def load_all_models(api_key: str = Depends(get_api_key)):
+    try:
+        logger.info("Starting to load all models...")
+        llm_manager.load()
+        logger.info("All models loaded successfully")
+        return {"status": "success", "message": "All models loaded"}
+    except Exception as e:
+        logger.error(f"Error loading models: {str(e)}")
+        raise HTTPException(status_code=500, detail=f"Failed to load models: {str(e)}")
+@app.post("/v1/translate", response_model=TranslationResponse)
+async def translate(request: TranslationRequest):
+    logger.info(f"Received translation request: {request.dict()}")
+    try:
+        translations = await call_external_translation(
+            sentences=request.sentences,
+            src_lang=request.src_lang,
+            tgt_lang=request.tgt_lang
+        )
+        logger.info(f"Translation successful: {translations}")
+        return TranslationResponse(translations=translations)
+    except HTTPException as e:
+        raise e
+    except Exception as e:
+        logger.error(f"Unexpected error during translation: {str(e)}")
+        raise HTTPException(status_code=500, detail=f"Translation failed: {str(e)}")
+@app.post("/v1/chat", response_model=ChatResponse)
+@limiter.limit(settings.chat_rate_limit)
+async def chat(request: Request, chat_request: ChatRequest, api_key: str = Depends(get_api_key)):
+    if not chat_request.prompt:
+        raise HTTPException(status_code=400, detail="Prompt cannot be empty")
+    logger.info(f"Received prompt: {chat_request.prompt}, src_lang: {chat_request.src_lang}, tgt_lang: {chat_request.tgt_lang}")
+    try:
+        # Translate prompt to English if src_lang is not English
+        if chat_request.src_lang != "eng_Latn":
+            translated_prompt = await call_external_translation(
+                sentences=[chat_request.prompt],
+                src_lang=chat_request.src_lang,
+                tgt_lang="eng_Latn"
+            )
+            prompt_to_process = translated_prompt[0]
+            logger.info(f"Translated prompt to English: {prompt_to_process}")
+        else:
+            prompt_to_process = chat_request.prompt
+            logger.info("Prompt already in English, no translation needed")
+        # Generate response in English
+        response = await llm_manager.generate(prompt_to_process, settings.max_tokens)
+        logger.info(f"Generated English response: {response}")
+        # Translate response to target language if tgt_lang is not English
+        if chat_request.tgt_lang != "eng_Latn":
+            translated_response = await call_external_translation(
+                sentences=[response],
+                src_lang="eng_Latn",
+                tgt_lang=chat_request.tgt_lang
+            )
+            final_response = translated_response[0]
+            logger.info(f"Translated response to {chat_request.tgt_lang}: {final_response}")
+        else:
+            final_response = response
+            logger.info("Response kept in English, no translation needed")
+        return ChatResponse(response=final_response)
+    except Exception as e:
+        logger.error(f"Error processing request: {str(e)}")
+        raise HTTPException(status_code=500, detail=f"An error occurred: {str(e)}")
+@app.post("/v1/visual_query/")
+async def visual_query(
+    file: UploadFile = File(...),
+    query: str = Body(...),
+    src_lang: str = Query("kan_Knda", enum=list(SUPPORTED_LANGUAGES)),
+    tgt_lang: str = Query("kan_Knda", enum=list(SUPPORTED_LANGUAGES)),
+    api_key: str = Depends(get_api_key)
+):
+    try:
+        image = Image.open(file.file)
+        if image.size == (0, 0):
+            raise HTTPException(status_code=400, detail="Uploaded image is empty or invalid")
+        # Translate query to English if src_lang is not English
+        if src_lang != "eng_Latn":
+            translated_query = await call_external_translation(
+                sentences=[query],
+                src_lang=src_lang,
+                tgt_lang="eng_Latn"
+            )
+            query_to_process = translated_query[0]
+            logger.info(f"Translated query to English: {query_to_process}")
+        else:
+            query_to_process = query
+            logger.info("Query already in English, no translation needed")
+        # Generate response in English
+        answer = await llm_manager.vision_query(image, query_to_process)
+        logger.info(f"Generated English answer: {answer}")
+        # Translate answer to target language if tgt_lang is not English
+        if tgt_lang != "eng_Latn":
+            translated_answer = await call_external_translation(
+                sentences=[answer],
+                src_lang="eng_Latn",
+                tgt_lang=tgt_lang
+            )
+            final_answer = translated_answer[0]
+            logger.info(f"Translated answer to {tgt_lang}: {final_answer}")
+        else:
+            final_answer = answer
+            logger.info("Answer kept in English, no translation needed")
+        return {"answer": final_answer}
+    except Exception as e:
+        logger.error(f"Error processing request: {str(e)}")
+        raise HTTPException(status_code=500, detail=f"An error occurred: {str(e)}")
+@app.post("/v1/chat_v2", response_model=ChatResponse)
+@limiter.limit(settings.chat_rate_limit)
+async def chat_v2(
+    request: Request,
+    prompt: str = Form(...),
+    image: UploadFile = File(default=None),
+    src_lang: str = Form("kan_Knda"),
+    tgt_lang: str = Form("kan_Knda"),
+    api_key: str = Depends(get_api_key)
+):
+    if not prompt:
+        raise HTTPException(status_code=400, detail="Prompt cannot be empty")
+    if src_lang not in SUPPORTED_LANGUAGES or tgt_lang not in SUPPORTED_LANGUAGES:
+        raise HTTPException(status_code=400, detail=f"Unsupported language code. Supported codes: {', '.join(SUPPORTED_LANGUAGES)}")
+    logger.info(f"Received prompt: {prompt}, src_lang: {src_lang}, tgt_lang: {tgt_lang}, Image provided: {image is not None}")
+    try:
+        if image:
+            image_data = await image.read()
+            if not image_data:
+                raise HTTPException(status_code=400, detail="Uploaded image is empty")
+            img = Image.open(io.BytesIO(image_data))
+            # Translate prompt to English if src_lang is not English
+            if src_lang != "eng_Latn":
+                translated_prompt = await call_external_translation(
+                    sentences=[prompt],
+                    src_lang=src_lang,
+                    tgt_lang="eng_Latn"
+                )
+                prompt_to_process = translated_prompt[0]
+                logger.info(f"Translated prompt to English: {prompt_to_process}")
+            else:
+                prompt_to_process = prompt
+                logger.info("Prompt already in English, no translation needed")
+            decoded = await llm_manager.chat_v2(img, prompt_to_process)
+            logger.info(f"Generated English response: {decoded}")
+            # Translate response to target language if tgt_lang is not English
+            if tgt_lang != "eng_Latn":
+                translated_response = await call_external_translation(
+                    sentences=[decoded],
+                    src_lang="eng_Latn",
+                    tgt_lang=tgt_lang
+                )
+                final_response = translated_response[0]
+                logger.info(f"Translated response to {tgt_lang}: {final_response}")
+            else:
+                final_response = decoded
+                logger.info("Response kept in English, no translation needed")
+        else:
+            # Translate prompt to English if src_lang is not English
+            if src_lang != "eng_Latn":
+                translated_prompt = await call_external_translation(
+                    sentences=[prompt],
+                    src_lang=src_lang,
+                    tgt_lang="eng_Latn"
+                )
+                prompt_to_process = translated_prompt[0]
+                logger.info(f"Translated prompt to English: {prompt_to_process}")
+            else:
+                prompt_to_process = prompt
+                logger.info("Prompt already in English, no translation needed")
+            decoded = await llm_manager.generate(prompt_to_process, settings.max_tokens)
+            logger.info(f"Generated English response: {decoded}")
+            # Translate response to target language if tgt_lang is not English
+            if tgt_lang != "eng_Latn":
+                translated_response = await call_external_translation(
+                    sentences=[decoded],
+                    src_lang="eng_Latn",
+                    tgt_lang=tgt_lang
+                )
+                final_response = translated_response[0]
+                logger.info(f"Translated response to {tgt_lang}: {final_response}")
+            else:
+                final_response = decoded
+                logger.info("Response kept in English, no translation needed")
+        return ChatResponse(response=final_response)
+    except Exception as e:
+        logger.error(f"Error processing request: {str(e)}")
+        raise HTTPException(status_code=500, detail=f"An error occurred: {str(e)}")
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser(description="Run the FastAPI server.")
+    parser.add_argument("--port", type=int, default=settings.port, help="Port to run the server on.")
+    parser.add_argument("--host", type=str, default=settings.host, help="Host to run the server on.")
+    args = parser.parse_args()
+    uvicorn.run(app, host=args.host, port=args.port)

src/server/text.py ADDED Viewed

	@@ -0,0 +1,3 @@

+def chunk_text(text: str, chunk_size: int = 15) -> list[str]:
+    words = text.split()
+    return [' '.join(words[i:i + chunk_size]) for i in range(0, len(words), chunk_size)]

src/server/tts_config.py ADDED Viewed

	@@ -0,0 +1,27 @@

+import enum
+from pydantic_settings import BaseSettings
+SPEED = 1.0
+class StrEnum(str, enum.Enum):
+    def __str__(self):
+        return str(self.value)
+class ResponseFormat(StrEnum):
+    MP3 = "mp3"
+    FLAC = "flac"
+    WAV = "wav"
+class Config(BaseSettings):
+    log_level: str = "info"
+    model: str = "ai4bharat/indic-parler-tts"
+    max_models: int = 1
+    lazy_load_model: bool = False  # Unused now, as all models are lazy-loaded
+    input: str = "ನಿಮ್ಮ ಇನ್‌ಪುಟ್ ಪಠ್ಯವನ್ನು ಇಲ್ಲಿ ಸೇರಿಸಿ"
+    voice: str = (
+        "Female speaks with a high pitch at a normal pace in a clear, close-sounding environment. "
+        "Her neutral tone is captured with excellent audio quality."
+    )
+    response_format: ResponseFormat = ResponseFormat.MP3
+config = Config()