Spaces:

slabstech
/

dhwani-server

Running on CPU Upgrade

App Files Files Community

sachin commited on Mar 18

Commit

445a506

1 Parent(s): 5a8554e

improve-swagger

Browse files

Files changed (2) hide show

src/server/main.py +181 -47
src/server/utils/auth.py +29 -12

src/server/main.py CHANGED Viewed

@@ -3,18 +3,21 @@ import io
 from time import time
 from typing import List, Optional
 from abc import ABC, abstractmethod
 import uvicorn
 from fastapi import Depends, FastAPI, File, HTTPException, Query, Request, UploadFile, Form
 from fastapi.middleware.cors import CORSMiddleware
 from fastapi.responses import JSONResponse, RedirectResponse, StreamingResponse
-from pydantic import BaseModel, Field, field_validator
 from slowapi import Limiter
 from slowapi.util import get_remote_address
 import requests
 from PIL import Image
-from utils.auth import get_current_user, login, refresh_token, TokenResponse, Settings, LoginRequest
 # Assuming these are in your project structure
 from config.tts_config import SPEED, ResponseFormat, config as tts_config
@@ -22,13 +25,14 @@ from config.logging_config import logger
 settings = Settings()
-# FastAPI app setup
 app = FastAPI(
     title="Dhwani API",
-    description="AI Chat API supporting Indian languages",
     version="1.0.0",
     redirect_slashes=False,
 )
 app.add_middleware(
     CORSMiddleware,
     allow_origins=["*"],
@@ -37,16 +41,16 @@ app.add_middleware(
     allow_headers=["*"],
 )
-limiter = Limiter(key_func=get_remote_address)
-app.state.limiter = limiter
 # Request/Response Models
 class SpeechRequest(BaseModel):
-    input: str
-    voice: str
-    model: str
-    response_format: ResponseFormat = tts_config.response_format
-    speed: float = SPEED
     @field_validator("input")
     def input_must_be_valid(cls, v):
@@ -61,14 +65,34 @@ class SpeechRequest(BaseModel):
             raise ValueError(f"Response format must be one of {[fmt.value for fmt in supported_formats]}")
         return v
 class TranscriptionResponse(BaseModel):
-    text: str
 class TextGenerationResponse(BaseModel):
-    text: str
 class AudioProcessingResponse(BaseModel):
-    result: str
 # TTS Service Interface
 class TTSService(ABC):
@@ -94,26 +118,68 @@ class ExternalTTSService(TTSService):
 def get_tts_service() -> TTSService:
     return ExternalTTSService()
-@app.post("/v1/token", response_model=TokenResponse)
 async def token(login_request: LoginRequest):
     return await login(login_request)
-@app.post("/v1/refresh", response_model=TokenResponse)
 async def refresh(token_response: TokenResponse = Depends(refresh_token)):
     return token_response
-@app.get("/v1/health")
-async def health_check():
-    return {"status": "healthy", "model": settings.llm_model_name}
-@app.get("/")
-async def home():
-    return RedirectResponse(url="/docs")
-@app.post("/v1/audio/speech")
 @limiter.limit(settings.speech_rate_limit)
 async def generate_audio(
     request: Request,
@@ -155,8 +221,8 @@ async def generate_audio(
     )
 class ChatRequest(BaseModel):
-    prompt: str
-    src_lang: str = "kan_Knda"
     @field_validator("prompt")
     def prompt_must_be_valid(cls, v):
@@ -164,10 +230,31 @@ class ChatRequest(BaseModel):
             raise ValueError("Prompt cannot exceed 1000 characters")
         return v.strip()
-class ChatResponse(BaseModel):
-    response: str
-@app.post("/v1/chat", response_model=ChatResponse)
 @limiter.limit(settings.chat_rate_limit)
 async def chat(
     request: Request,
@@ -212,13 +299,22 @@ async def chat(
         logger.error(f"Error processing request: {str(e)}")
         raise HTTPException(status_code=500, detail=f"An error occurred: {str(e)}")
-@app.post("/v1/process_audio/", response_model=AudioProcessingResponse)
 @limiter.limit(settings.chat_rate_limit)
 async def process_audio(
-    file: UploadFile = File(...),
-    language: str = Query(..., enum=["kannada", "hindi", "tamil"]),
     user_id: str = Depends(get_current_user),
-    request: Request = None,
 ):
     logger.info("Processing audio processing request", extra={
         "endpoint": "/v1/process_audio",
@@ -251,10 +347,18 @@ async def process_audio(
         logger.error(f"Audio processing request failed: {str(e)}")
         raise HTTPException(status_code=500, detail=f"Audio processing failed: {str(e)}")
-@app.post("/v1/transcribe/", response_model=TranscriptionResponse)
 async def transcribe_audio(
-    file: UploadFile = File(...),
-    language: str = Query(..., enum=["kannada", "hindi", "tamil"]),
     user_id: str = Depends(get_current_user),
     request: Request = None,
 ):
@@ -280,12 +384,21 @@ async def transcribe_audio(
     except requests.RequestException as e:
         raise HTTPException(status_code=500, detail=f"Transcription failed: {str(e)}")
-@app.post("/v1/chat_v2", response_model=TranscriptionResponse)
 @limiter.limit(settings.chat_rate_limit)
 async def chat_v2(
     request: Request,
-    prompt: str = Form(...),
-    image: UploadFile = File(default=None),
     user_id: str = Depends(get_current_user)
 ):
     if not prompt:
@@ -308,14 +421,35 @@ async def chat_v2(
         raise HTTPException(status_code=500, detail=f"An error occurred: {str(e)}")
 class TranslationRequest(BaseModel):
-    sentences: list[str]
-    src_lang: str
-    tgt_lang: str
 class TranslationResponse(BaseModel):
-    translations: list[str]
-@app.post("/v1/translate", response_model=TranslationResponse)
 async def translate(
     request: TranslationRequest,
     user_id: str = Depends(get_current_user)

 from time import time
 from typing import List, Optional
 from abc import ABC, abstractmethod
+from pydantic import BaseModel, Field
 import uvicorn
 from fastapi import Depends, FastAPI, File, HTTPException, Query, Request, UploadFile, Form
 from fastapi.middleware.cors import CORSMiddleware
 from fastapi.responses import JSONResponse, RedirectResponse, StreamingResponse
+from pydantic import BaseModel, field_validator
 from slowapi import Limiter
 from slowapi.util import get_remote_address
 import requests
 from PIL import Image
+# Import from auth.py
+from utils.auth import get_current_user, login, refresh_token, register, TokenResponse, Settings, LoginRequest, RegisterRequest
 # Assuming these are in your project structure
 from config.tts_config import SPEED, ResponseFormat, config as tts_config
 settings = Settings()
+# FastAPI app setup with enhanced docs
 app = FastAPI(
     title="Dhwani API",
+    description="A multilingual AI-powered API supporting Indian languages for chat, text-to-speech, audio processing, and transcription.",
     version="1.0.0",
     redirect_slashes=False,
 )
 app.add_middleware(
     CORSMiddleware,
     allow_origins=["*"],
     allow_headers=["*"],
 )
+# Rate limiting based on user_id
+limiter = Limiter(key_func=lambda request: get_current_user(request.scope.get("route").dependencies))
 # Request/Response Models
 class SpeechRequest(BaseModel):
+    input: str = Field(..., description="Text to convert to speech (max 1000 characters)")
+    voice: str = Field(..., description="Voice identifier for the TTS service")
+    model: str = Field(..., description="TTS model to use")
+    response_format: ResponseFormat = Field(tts_config.response_format, description="Audio format: mp3, flac, or wav")
+    speed: float = Field(SPEED, description="Speech speed (default: 1.0)")
     @field_validator("input")
     def input_must_be_valid(cls, v):
             raise ValueError(f"Response format must be one of {[fmt.value for fmt in supported_formats]}")
         return v
+    class Config:
+        schema_extra = {
+            "example": {
+                "input": "Hello, how are you?",
+                "voice": "female-1",
+                "model": "tts-model-1",
+                "response_format": "mp3",
+                "speed": 1.0
+            }
+        }
 class TranscriptionResponse(BaseModel):
+    text: str = Field(..., description="Transcribed text from the audio")
+    class Config:
+        schema_extra = {"example": {"text": "Hello, how are you?"}}
 class TextGenerationResponse(BaseModel):
+    text: str = Field(..., description="Generated text response")
+    class Config:
+        schema_extra = {"example": {"text": "Hi there, I'm doing great!"}}
 class AudioProcessingResponse(BaseModel):
+    result: str = Field(..., description="Processed audio result")
+    class Config:
+        schema_extra = {"example": {"result": "Processed audio output"}}
 # TTS Service Interface
 class TTSService(ABC):
 def get_tts_service() -> TTSService:
     return ExternalTTSService()
+# Endpoints with enhanced Swagger docs
+@app.get("/v1/health",
+         summary="Check API Health",
+         description="Returns the health status of the API and the current model in use.",
+         tags=["Utility"],
+         response_model=dict)
+async def health_check():
+    return {"status": "healthy", "model": settings.llm_model_name}
+@app.get("/",
+         summary="Redirect to Docs",
+         description="Redirects to the Swagger UI documentation.",
+         tags=["Utility"])
+async def home():
+    return RedirectResponse(url="/docs")
+@app.post("/v1/token",
+          response_model=TokenResponse,
+          summary="User Login",
+          description="Authenticate a user with username and password to obtain an access token.",
+          tags=["Authentication"],
+          responses={
+              200: {"description": "Successful login", "model": TokenResponse},
+              401: {"description": "Invalid username or password"}
+          })
 async def token(login_request: LoginRequest):
     return await login(login_request)
+@app.post("/v1/refresh",
+          response_model=TokenResponse,
+          summary="Refresh Access Token",
+          description="Generate a new access token using an existing valid token.",
+          tags=["Authentication"],
+          responses={
+              200: {"description": "New token issued", "model": TokenResponse},
+              401: {"description": "Invalid or expired token"}
+          })
 async def refresh(token_response: TokenResponse = Depends(refresh_token)):
     return token_response
+@app.post("/v1/register",
+          response_model=TokenResponse,
+          summary="Register New User",
+          description="Create a new user account and return an access token.",
+          tags=["Authentication"],
+          responses={
+              200: {"description": "User registered successfully", "model": TokenResponse},
+              400: {"description": "Username already exists"}
+          })
+async def register_user(register_request: RegisterRequest):
+    return await register(register_request)
+@app.post("/v1/audio/speech",
+          summary="Generate Speech from Text",
+          description="Convert text to speech in the specified format using an external TTS service. Rate limited to 5 requests per minute per user.",
+          tags=["Audio"],
+          responses={
+              200: {"description": "Audio stream", "content": {"audio/mp3": {"example": "Binary audio data"}}},
+              400: {"description": "Invalid input"},
+              429: {"description": "Rate limit exceeded"},
+              504: {"description": "TTS service timeout"}
+          })
 @limiter.limit(settings.speech_rate_limit)
 async def generate_audio(
     request: Request,
     )
 class ChatRequest(BaseModel):
+    prompt: str = Field(..., description="Text prompt for chat (max 1000 characters)")
+    src_lang: str = Field("kan_Knda", description="Source language code (default: Kannada)")
     @field_validator("prompt")
     def prompt_must_be_valid(cls, v):
             raise ValueError("Prompt cannot exceed 1000 characters")
         return v.strip()
+    class Config:
+        schema_extra = {
+            "example": {
+                "prompt": "Hello, how are you?",
+                "src_lang": "kan_Knda"
+            }
+        }
+class ChatResponse(BaseModel):
+    response: str = Field(..., description="Generated chat response")
+    class Config:
+        schema_extra = {"example": {"response": "Hi there, I'm doing great!"}}
+@app.post("/v1/chat",
+          response_model=ChatResponse,
+          summary="Chat with AI",
+          description="Generate a chat response from a prompt in the specified language. Rate limited to 100 requests per minute per user.",
+          tags=["Chat"],
+          responses={
+              200: {"description": "Chat response", "model": ChatResponse},
+              400: {"description": "Invalid prompt"},
+              429: {"description": "Rate limit exceeded"},
+              504: {"description": "Chat service timeout"}
+          })
 @limiter.limit(settings.chat_rate_limit)
 async def chat(
     request: Request,
         logger.error(f"Error processing request: {str(e)}")
         raise HTTPException(status_code=500, detail=f"An error occurred: {str(e)}")
+@app.post("/v1/process_audio/",
+          response_model=AudioProcessingResponse,
+          summary="Process Audio File",
+          description="Process an uploaded audio file in the specified language. Rate limited to 100 requests per minute per user.",
+          tags=["Audio"],
+          responses={
+              200: {"description": "Processed result", "model": AudioProcessingResponse},
+              429: {"description": "Rate limit exceeded"},
+              504: {"description": "Audio processing timeout"}
+          })
 @limiter.limit(settings.chat_rate_limit)
 async def process_audio(
+    request: Request,
+    file: UploadFile = File(..., description="Audio file to process"),
+    language: str = Query(..., enum=["kannada", "hindi", "tamil"], description="Language of the audio"),
     user_id: str = Depends(get_current_user),
 ):
     logger.info("Processing audio processing request", extra={
         "endpoint": "/v1/process_audio",
         logger.error(f"Audio processing request failed: {str(e)}")
         raise HTTPException(status_code=500, detail=f"Audio processing failed: {str(e)}")
+@app.post("/v1/transcribe/",
+          response_model=TranscriptionResponse,
+          summary="Transcribe Audio File",
+          description="Transcribe an uploaded audio file into text in the specified language.",
+          tags=["Audio"],
+          responses={
+              200: {"description": "Transcription result", "model": TranscriptionResponse},
+              504: {"description": "Transcription service timeout"}
+          })
 async def transcribe_audio(
+    file: UploadFile = File(..., description="Audio file to transcribe"),
+    language: str = Query(..., enum=["kannada", "hindi", "tamil"], description="Language of the audio"),
     user_id: str = Depends(get_current_user),
     request: Request = None,
 ):
     except requests.RequestException as e:
         raise HTTPException(status_code=500, detail=f"Transcription failed: {str(e)}")
+@app.post("/v1/chat_v2",
+          response_model=TranscriptionResponse,
+          summary="Chat with Image (V2)",
+          description="Generate a response from a text prompt and optional image. Rate limited to 100 requests per minute per user.",
+          tags=["Chat"],
+          responses={
+              200: {"description": "Chat response", "model": TranscriptionResponse},
+              400: {"description": "Invalid prompt"},
+              429: {"description": "Rate limit exceeded"}
+          })
 @limiter.limit(settings.chat_rate_limit)
 async def chat_v2(
     request: Request,
+    prompt: str = Form(..., description="Text prompt for chat"),
+    image: UploadFile = File(default=None, description="Optional image to accompany the prompt"),
     user_id: str = Depends(get_current_user)
 ):
     if not prompt:
         raise HTTPException(status_code=500, detail=f"An error occurred: {str(e)}")
 class TranslationRequest(BaseModel):
+    sentences: List[str] = Field(..., description="List of sentences to translate")
+    src_lang: str = Field(..., description="Source language code")
+    tgt_lang: str = Field(..., description="Target language code")
+    class Config:
+        schema_extra = {
+            "example": {
+                "sentences": ["Hello", "How are you?"],
+                "src_lang": "en",
+                "tgt_lang": "kan_Knda"
+            }
+        }
 class TranslationResponse(BaseModel):
+    translations: List[str] = Field(..., description="Translated sentences")
+    class Config:
+        schema_extra = {"example": {"translations": ["ನಮಸ್ಕಾರ", "ನೀವು ಹೇಗಿದ್ದೀರಿ?"]}}
+@app.post("/v1/translate",
+          response_model=TranslationResponse,
+          summary="Translate Text",
+          description="Translate a list of sentences from source to target language.",
+          tags=["Translation"],
+          responses={
+              200: {"description": "Translation result", "model": TranslationResponse},
+              500: {"description": "Translation service error"},
+              504: {"description": "Translation service timeout"}
+          })
 async def translate(
     request: TranslationRequest,
     user_id: str = Depends(get_current_user)

src/server/utils/auth.py CHANGED Viewed

@@ -8,29 +8,25 @@ from config.logging_config import logger
 from sqlalchemy import create_engine, Column, String
 from sqlalchemy.ext.declarative import declarative_base
 from sqlalchemy.orm import sessionmaker
 from passlib.context import CryptContext
 # SQLite database setup
 DATABASE_URL = "sqlite:///users.db"
-engine = create_engine(DATABASE_URL, connect_args={"check_same_thread": False})  # For SQLite threading
 Base = declarative_base()
 SessionLocal = sessionmaker(autocommit=False, autoflush=False, bind=engine)
-# Password hashing setup
-pwd_context = CryptContext(schemes=["bcrypt"], deprecated="auto")
 class User(Base):
     __tablename__ = "users"
     username = Column(String, primary_key=True, index=True)
-    password = Column(String)  # Now stores hashed passwords
-# Create the database tables
 Base.metadata.create_all(bind=engine)
-# Seed initial data (optional, for testing)
 def seed_initial_data():
     db = SessionLocal()
     if not db.query(User).filter_by(username="testuser").first():
@@ -39,8 +35,7 @@ def seed_initial_data():
         db.commit()
     db.close()
-seed_initial_data()  # Run once at startup
 class Settings(BaseSettings):
     api_key_secret: str = Field(..., env="API_KEY_SECRET")
@@ -77,6 +72,10 @@ class LoginRequest(BaseModel):
     username: str
     password: str
 async def create_access_token(user_id: str) -> str:
     expire = datetime.utcnow() + timedelta(minutes=settings.token_expiration_minutes)
     payload = {"sub": user_id, "exp": expire.timestamp()}
@@ -138,6 +137,24 @@ async def login(login_request: LoginRequest) -> TokenResponse:
     token = await create_access_token(user_id=user.username)
     return TokenResponse(access_token=token, token_type="bearer")
 async def refresh_token(token: str = Depends(oauth2_scheme)) -> TokenResponse:
     user_id = await get_current_user(token)
     new_token = await create_access_token(user_id=user_id)

 from sqlalchemy import create_engine, Column, String
 from sqlalchemy.ext.declarative import declarative_base
 from sqlalchemy.orm import sessionmaker
 from passlib.context import CryptContext
 # SQLite database setup
 DATABASE_URL = "sqlite:///users.db"
+engine = create_engine(DATABASE_URL, connect_args={"check_same_thread": False})
 Base = declarative_base()
 SessionLocal = sessionmaker(autocommit=False, autoflush=False, bind=engine)
 class User(Base):
     __tablename__ = "users"
     username = Column(String, primary_key=True, index=True)
+    password = Column(String)  # Stores hashed passwords
 Base.metadata.create_all(bind=engine)
+# Password hashing
+pwd_context = CryptContext(schemes=["bcrypt"], deprecated="auto")
+# Seed initial data (optional)
 def seed_initial_data():
     db = SessionLocal()
     if not db.query(User).filter_by(username="testuser").first():
         db.commit()
     db.close()
+seed_initial_data()
 class Settings(BaseSettings):
     api_key_secret: str = Field(..., env="API_KEY_SECRET")
     username: str
     password: str
+class RegisterRequest(BaseModel):
+    username: str
+    password: str
 async def create_access_token(user_id: str) -> str:
     expire = datetime.utcnow() + timedelta(minutes=settings.token_expiration_minutes)
     payload = {"sub": user_id, "exp": expire.timestamp()}
     token = await create_access_token(user_id=user.username)
     return TokenResponse(access_token=token, token_type="bearer")
+async def register(register_request: RegisterRequest) -> TokenResponse:
+    db = SessionLocal()
+    existing_user = db.query(User).filter_by(username=register_request.username).first()
+    if existing_user:
+        db.close()
+        logger.warning(f"Registration failed: Username {register_request.username} already exists")
+        raise HTTPException(status_code=status.HTTP_400_BAD_REQUEST, detail="Username already exists")
+    hashed_password = pwd_context.hash(register_request.password)
+    new_user = User(username=register_request.username, password=hashed_password)
+    db.add(new_user)
+    db.commit()
+    db.close()
+    token = await create_access_token(user_id=register_request.username)
+    logger.info(f"Registered and generated token for user: {register_request.username}")
+    return TokenResponse(access_token=token, token_type="bearer")
 async def refresh_token(token: str = Depends(oauth2_scheme)) -> TokenResponse:
     user_id = await get_current_user(token)
     new_token = await create_access_token(user_id=user_id)