pdarleyjr
/

iplc-eval

Model card Files Files and versions Community

pdarleyjr commited on Feb 6

Commit

6c293ab

verified ·

1 Parent(s): c1e71cb

Upload app.py with huggingface_hub

Browse files

Files changed (1) hide show

app.py +15 -12

app.py CHANGED Viewed

@@ -24,7 +24,7 @@ logger.add(
 # Initialize FastAPI app with metadata
 app = FastAPI(
     title="Clinical Report Generator API",
-    description="Production API for generating clinical report summaries using Flan-T5",
     version="1.0.0",
     docs_url="/documentation",  # Swagger UI
     redoc_url="/redoc"  # ReDoc
@@ -40,6 +40,9 @@ app.add_middleware(
     max_age=3600,  # Cache preflight requests
 )
 class ModelManager:
     def __init__(self):
         self.model = None
@@ -64,30 +67,30 @@ class ModelManager:
             if torch.cuda.is_available():
                 logger.info(f"CUDA memory: {torch.cuda.memory_allocated() / (1024*1024*1024):.2f}GB allocated")
-            # Load tokenizer for Flan-T5-base
-            logger.info("Initializing Flan-T5-base tokenizer...")
             self.tokenizer = T5Tokenizer.from_pretrained(
-                "pdarleyjr/iplc-t5-clinical",
-                use_fast=True,  # Use fast tokenizer
                 model_max_length=512
             )
-            logger.success("Flan-T5-base tokenizer loaded successfully")
             # Load model configuration
             logger.info("Fetching model configuration...")
             config = AutoConfig.from_pretrained(
-                "pdarleyjr/iplc-t5-clinical",
                 trust_remote_code=False
             )
             logger.success("Model configuration loaded successfully")
-            # Load the Flan-T5-base model
-            logger.info("Loading Flan-T5-base model (this may take a few minutes)...")
             device = "cuda" if torch.cuda.is_available() else "cpu"
             logger.info(f"Using device: {device}")
             self.model = T5ForConditionalGeneration.from_pretrained(
-                "pdarleyjr/iplc-t5-clinical",
                 config=config,
                 torch_dtype=torch.float16 if device == "cuda" else torch.float32,
                 low_cpu_mem_usage=True
@@ -181,8 +184,8 @@ async def predict(request: PredictRequest) -> JSONResponse:
             with torch.no_grad(), model_manager.accelerator.autocast():
                 outputs = model_manager.model.generate(
                     input_ids,
-                    max_length=512,  # Increased from 256 to allow for longer summaries
-                    num_beams=5,     # Increased from 4 for more robust beam search
                     no_repeat_ngram_size=3,
                     length_penalty=2.0,
                     early_stopping=True,

 # Initialize FastAPI app with metadata
 app = FastAPI(
     title="Clinical Report Generator API",
+    description="Production API for generating clinical report summaries using T5",
     version="1.0.0",
     docs_url="/documentation",  # Swagger UI
     redoc_url="/redoc"  # ReDoc
     max_age=3600,  # Cache preflight requests
 )
+# Model configuration
+MODEL_ID = "pdarleyjr/iplc-t5-clinical"
 class ModelManager:
     def __init__(self):
         self.model = None
             if torch.cuda.is_available():
                 logger.info(f"CUDA memory: {torch.cuda.memory_allocated() / (1024*1024*1024):.2f}GB allocated")
+            # Load tokenizer
+            logger.info("Initializing tokenizer...")
             self.tokenizer = T5Tokenizer.from_pretrained(
+                MODEL_ID,
+                use_fast=True,
                 model_max_length=512
             )
+            logger.success("Tokenizer loaded successfully")
             # Load model configuration
             logger.info("Fetching model configuration...")
             config = AutoConfig.from_pretrained(
+                MODEL_ID,
                 trust_remote_code=False
             )
             logger.success("Model configuration loaded successfully")
+            # Load the model
+            logger.info("Loading model (this may take a few minutes)...")
             device = "cuda" if torch.cuda.is_available() else "cpu"
             logger.info(f"Using device: {device}")
             self.model = T5ForConditionalGeneration.from_pretrained(
+                MODEL_ID,
                 config=config,
                 torch_dtype=torch.float16 if device == "cuda" else torch.float32,
                 low_cpu_mem_usage=True
             with torch.no_grad(), model_manager.accelerator.autocast():
                 outputs = model_manager.model.generate(
                     input_ids,
+                    max_length=512,  # Increased for longer summaries
+                    num_beams=5,     # Increased for better coherence
                     no_repeat_ngram_size=3,
                     length_penalty=2.0,
                     early_stopping=True,