m1k3wn commited on
Commit
e3e12f1
·
verified ·
1 Parent(s): 5240386

Update app.py

Browse files

reconfigure autmatic optimisations

Files changed (1) hide show
  1. app.py +37 -7
app.py CHANGED
@@ -4,6 +4,7 @@ from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
4
  import logging
5
  from typing import Optional, Dict, Any
6
  import os
 
7
 
8
  # Set up logging
9
  logging.basicConfig(level=logging.INFO)
@@ -41,13 +42,24 @@ def load_model(model_name: str):
41
  logger.info(f"Loading {model_name}...")
42
  try:
43
  model_path = MODELS[model_name]
44
- tokenizer = AutoTokenizer.from_pretrained(model_path, token=HF_TOKEN)
 
 
 
 
 
 
 
 
45
  model = AutoModelForSeq2SeqLM.from_pretrained(
46
  model_path,
47
  token=HF_TOKEN,
48
- device_map="auto",
49
- torch_dtype="auto"
50
  )
 
 
 
 
51
  loaded_models[model_name] = model
52
  loaded_tokenizers[model_name] = tokenizer
53
  logger.info(f"Successfully loaded {model_name}")
@@ -89,10 +101,28 @@ async def predict(request: PredictionRequest):
89
  # Prepend the shared prefix
90
  full_input = "Interpret this dream: " + request.inputs
91
 
92
- # Tokenize and generate
93
- input_ids = tokenizer(full_input, return_tensors="pt").input_ids
94
- outputs = model.generate(input_ids, **request.parameters)
95
- decoded = tokenizer.decode(outputs[0], skip_special_tokens=True)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
96
 
97
  return PredictionResponse(generated_text=decoded)
98
 
 
4
  import logging
5
  from typing import Optional, Dict, Any
6
  import os
7
+ import torch
8
 
9
  # Set up logging
10
  logging.basicConfig(level=logging.INFO)
 
42
  logger.info(f"Loading {model_name}...")
43
  try:
44
  model_path = MODELS[model_name]
45
+
46
+ # Load tokenizer with minimal settings
47
+ tokenizer = AutoTokenizer.from_pretrained(
48
+ model_path,
49
+ token=HF_TOKEN,
50
+ use_fast=False # Use slower but more stable tokenizer
51
+ )
52
+
53
+ # Load model with minimal settings
54
  model = AutoModelForSeq2SeqLM.from_pretrained(
55
  model_path,
56
  token=HF_TOKEN,
57
+ torch_dtype=torch.float32, # Use standard precision
 
58
  )
59
+
60
+ # Move model to CPU explicitly
61
+ model = model.cpu()
62
+
63
  loaded_models[model_name] = model
64
  loaded_tokenizers[model_name] = tokenizer
65
  logger.info(f"Successfully loaded {model_name}")
 
101
  # Prepend the shared prefix
102
  full_input = "Interpret this dream: " + request.inputs
103
 
104
+ # Tokenize and generate with explicit error handling
105
+ try:
106
+ input_ids = tokenizer(
107
+ full_input,
108
+ return_tensors="pt",
109
+ padding=True,
110
+ truncation=True,
111
+ max_length=512
112
+ ).input_ids
113
+
114
+ outputs = model.generate(
115
+ input_ids,
116
+ max_length=200,
117
+ num_return_sequences=1,
118
+ no_repeat_ngram_size=2,
119
+ **request.parameters
120
+ )
121
+
122
+ decoded = tokenizer.decode(outputs[0], skip_special_tokens=True)
123
+ except Exception as e:
124
+ logger.error(f"Error in model prediction pipeline: {str(e)}")
125
+ raise HTTPException(status_code=500, detail=f"Model prediction failed: {str(e)}")
126
 
127
  return PredictionResponse(generated_text=decoded)
128