Spaces:

m1k3wn
/

nidra

Sleeping

App Files Files Community

m1k3wn commited on Jan 17

Commit

19ec348

verified ·

1 Parent(s): 77ad07b

Update app.py

Browse files

Files changed (1) hide show

app.py +7 -32

app.py CHANGED Viewed

@@ -1,9 +1,8 @@
 from fastapi import FastAPI, HTTPException
 from pydantic import BaseModel
-from transformers import T5Tokenizer, T5ForConditionalGeneration
 import logging
 import os
-import json
 logging.basicConfig(level=logging.DEBUG)
 logger = logging.getLogger(__name__)
@@ -16,17 +15,6 @@ MODELS = {
     "nidra-v2": "m1k3wn/nidra-v2"
 }
-# Define the tokenizer configuration explicitly
-TOKENIZER_CONFIG = {
-    "model_max_length": 512,
-    "clean_up_tokenization_spaces": False,
-    "tokenizer_class": "T5Tokenizer",
-    "pad_token": "<pad>",
-    "eos_token": "</s>",
-    "unk_token": "<unk>",
-    "extra_ids": 100
-}
 class PredictionRequest(BaseModel):
     inputs: str
     model: str = "nidra-v1"
@@ -40,56 +28,43 @@ async def predict(request: PredictionRequest):
         logger.info(f"Loading model: {request.model}")
         model_path = MODELS[request.model]
-        # Initialize tokenizer with explicit config
-        tokenizer = T5Tokenizer.from_pretrained(
             model_path,
             token=HF_TOKEN,
-            model_max_length=TOKENIZER_CONFIG["model_max_length"],
-            clean_up_tokenization_spaces=TOKENIZER_CONFIG["clean_up_tokenization_spaces"],
-            pad_token=TOKENIZER_CONFIG["pad_token"],
-            eos_token=TOKENIZER_CONFIG["eos_token"],
-            unk_token=TOKENIZER_CONFIG["unk_token"],
-            extra_ids=TOKENIZER_CONFIG["extra_ids"],
-            use_fast=True  # Try forcing the fast tokenizer
         )
-        model = T5ForConditionalGeneration.from_pretrained(
             model_path,
             token=HF_TOKEN,
-            torch_dtype="auto"
         )
         full_input = "Interpret this dream: " + request.inputs
         logger.info(f"Processing: {full_input}")
-        # Add explicit encoding parameters
         inputs = tokenizer(
             full_input,
             return_tensors="pt",
             truncation=True,
             max_length=512,
-            padding=True,
-            add_special_tokens=True
         )
         outputs = model.generate(
             **inputs,
             max_length=200,
             num_beams=4,
-            no_repeat_ngram_size=2,
-            length_penalty=1.0
         )
         result = tokenizer.decode(outputs[0], skip_special_tokens=True)
-        logger.info(f"Generated result: {result}")
         return PredictionResponse(generated_text=result)
     except Exception as e:
         logger.error(f"Error: {str(e)}")
         raise HTTPException(status_code=500, detail=str(e))
-# Add health check endpoint
 @app.get("/health")
 async def health():
     return {"status": "healthy"}

 from fastapi import FastAPI, HTTPException
 from pydantic import BaseModel
+from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
 import logging
 import os
 logging.basicConfig(level=logging.DEBUG)
 logger = logging.getLogger(__name__)
     "nidra-v2": "m1k3wn/nidra-v2"
 }
 class PredictionRequest(BaseModel):
     inputs: str
     model: str = "nidra-v1"
         logger.info(f"Loading model: {request.model}")
         model_path = MODELS[request.model]
+        # Load tokenizer and model
+        tokenizer = AutoTokenizer.from_pretrained(
             model_path,
             token=HF_TOKEN,
         )
+        model = AutoModelForSeq2SeqLM.from_pretrained(
             model_path,
             token=HF_TOKEN,
+            device_map="auto"
         )
         full_input = "Interpret this dream: " + request.inputs
         logger.info(f"Processing: {full_input}")
         inputs = tokenizer(
             full_input,
             return_tensors="pt",
             truncation=True,
             max_length=512,
+            padding=True
         )
         outputs = model.generate(
             **inputs,
             max_length=200,
             num_beams=4,
+            no_repeat_ngram_size=2
         )
         result = tokenizer.decode(outputs[0], skip_special_tokens=True)
         return PredictionResponse(generated_text=result)
     except Exception as e:
         logger.error(f"Error: {str(e)}")
         raise HTTPException(status_code=500, detail=str(e))
 @app.get("/health")
 async def health():
     return {"status": "healthy"}