Spaces:

m1k3wn
/

nidra

Sleeping

App Files Files Community

m1k3wn commited on Jan 17

Commit

4347c84

verified ·

1 Parent(s): 3e742c6

Update app.py

Browse files

adds type safe and improved debugging

Files changed (1) hide show

app.py +35 -33

app.py CHANGED Viewed

@@ -1,5 +1,5 @@
 from fastapi import FastAPI, HTTPException
-from pydantic import BaseModel
 from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
 import logging
 from typing import Optional, Dict, Any
@@ -30,9 +30,23 @@ loaded_tokenizers = {}
 # Pydantic models for request/response validation
 class PredictionRequest(BaseModel):
     inputs: str
-    model: str = "nidra-v1"  # Default to v1
     parameters: Optional[Dict[str, Any]] = {}
 class PredictionResponse(BaseModel):
     generated_text: str
@@ -43,21 +57,20 @@ def load_model(model_name: str):
         try:
             model_path = MODELS[model_name]
-            # Load tokenizer with minimal settings
             tokenizer = AutoTokenizer.from_pretrained(
                 model_path,
                 token=HF_TOKEN,
-                use_fast=False  # Use slower but more stable tokenizer
             )
-            # Load model with minimal settings
             model = AutoModelForSeq2SeqLM.from_pretrained(
                 model_path,
                 token=HF_TOKEN,
-                torch_dtype=torch.float32,  # Use standard precision
             )
-            # Move model to CPU explicitly
             model = model.cpu()
             loaded_models[model_name] = model
@@ -68,49 +81,34 @@ def load_model(model_name: str):
             raise
     return loaded_tokenizers[model_name], loaded_models[model_name]
-@app.get("/")
-def read_root():
-    """Root endpoint with API info"""
-    return {
-        "api_name": "Dream Interpretation API",
-        "models_available": list(MODELS.keys()),
-        "endpoints": {
-            "/predict": "POST - Make predictions",
-            "/health": "GET - Health check"
-        }
-    }
-@app.get("/health")
-def health_check():
-    """Basic health check endpoint"""
-    return {"status": "healthy"}
 @app.post("/predict", response_model=PredictionResponse)
 async def predict(request: PredictionRequest):
     """Make a prediction using the specified model"""
     try:
-        if request.model not in MODELS:
-            raise HTTPException(
-                status_code=400,
-                detail=f"Invalid model choice. Available models: {list(MODELS.keys())}"
-            )
         # Load model on demand
         tokenizer, model = load_model(request.model)
         # Prepend the shared prefix
         full_input = "Interpret this dream: " + request.inputs
-        # Tokenize and generate with explicit error handling
         try:
-            input_ids = tokenizer(
                 full_input,
                 return_tensors="pt",
                 padding=True,
                 truncation=True,
                 max_length=512
-            ).input_ids
             outputs = model.generate(
                 input_ids,
                 max_length=200,
@@ -118,8 +116,12 @@ async def predict(request: PredictionRequest):
                 no_repeat_ngram_size=2,
                 **request.parameters
             )
             decoded = tokenizer.decode(outputs[0], skip_special_tokens=True)
         except Exception as e:
             logger.error(f"Error in model prediction pipeline: {str(e)}")
             raise HTTPException(status_code=500, detail=f"Model prediction failed: {str(e)}")

 from fastapi import FastAPI, HTTPException
+from pydantic import BaseModel, validator
 from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
 import logging
 from typing import Optional, Dict, Any
 # Pydantic models for request/response validation
 class PredictionRequest(BaseModel):
     inputs: str
+    model: str = "nidra-v1"
     parameters: Optional[Dict[str, Any]] = {}
+    @validator('inputs')
+    def validate_inputs(cls, v):
+        if not isinstance(v, str):
+            raise ValueError('inputs must be a string')
+        if not v.strip():
+            raise ValueError('inputs cannot be empty')
+        return v.strip()
+    @validator('model')
+    def validate_model(cls, v):
+        if v not in MODELS:
+            raise ValueError(f'model must be one of: {", ".join(MODELS.keys())}')
+        return v
 class PredictionResponse(BaseModel):
     generated_text: str
         try:
             model_path = MODELS[model_name]
+            logger.info("Loading tokenizer...")
             tokenizer = AutoTokenizer.from_pretrained(
                 model_path,
                 token=HF_TOKEN,
+                use_fast=False
             )
+            logger.info("Loading model...")
             model = AutoModelForSeq2SeqLM.from_pretrained(
                 model_path,
                 token=HF_TOKEN,
+                torch_dtype=torch.float32,
             )
             model = model.cpu()
             loaded_models[model_name] = model
             raise
     return loaded_tokenizers[model_name], loaded_models[model_name]
 @app.post("/predict", response_model=PredictionResponse)
 async def predict(request: PredictionRequest):
     """Make a prediction using the specified model"""
     try:
         # Load model on demand
         tokenizer, model = load_model(request.model)
+        # Log the input for debugging
+        logger.info(f"Processing input: {request.inputs}")
         # Prepend the shared prefix
         full_input = "Interpret this dream: " + request.inputs
+        logger.info(f"Full input: {full_input}")
         try:
+            # Tokenize
+            tokenizer_output = tokenizer(
                 full_input,
                 return_tensors="pt",
                 padding=True,
                 truncation=True,
                 max_length=512
+            )
+            logger.info("Tokenization successful")
+            input_ids = tokenizer_output.input_ids
+            # Generate
             outputs = model.generate(
                 input_ids,
                 max_length=200,
                 no_repeat_ngram_size=2,
                 **request.parameters
             )
+            logger.info("Generation successful")
+            # Decode
             decoded = tokenizer.decode(outputs[0], skip_special_tokens=True)
+            logger.info(f"Decoded output: {decoded}")
         except Exception as e:
             logger.error(f"Error in model prediction pipeline: {str(e)}")
             raise HTTPException(status_code=500, detail=f"Model prediction failed: {str(e)}")