DiDustin commited on
Commit
f5a480d
·
verified ·
1 Parent(s): 15dcf22

Update app.py

Browse files

update device use only cuda (gpu)
use additional gpu decorator for any function than loads, runs models to gpu

Files changed (1) hide show
  1. app.py +5 -5
app.py CHANGED
@@ -63,11 +63,11 @@ LANGUAGES = {
63
  loaded_models = {}
64
  loaded_tokenizers = {}
65
 
66
-
67
  def load_model_and_tokenizer(model_key):
68
  if model_key not in loaded_models:
69
  model_info = MODELS[model_key]
70
- device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
71
  model = AutoModelForCausalLM.from_pretrained(
72
  model_info["model_name"],
73
  token=HF_TOKEN,
@@ -84,13 +84,13 @@ def load_model_and_tokenizer(model_key):
84
  tokenizer.pad_token = tokenizer.eos_token
85
  loaded_tokenizers[model_key] = tokenizer
86
 
87
-
88
  def generate_text(model_choice, prompt, max_length, temperature, top_p, do_sample):
89
  load_model_and_tokenizer(model_choice)
90
 
91
  model = loaded_models[model_choice]
92
  tokenizer = loaded_tokenizers[model_choice]
93
- device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
94
 
95
  inputs = tokenizer(prompt, return_tensors="pt", truncation=True, padding=True).to(device)
96
 
@@ -136,7 +136,7 @@ def update_language(selected_language):
136
  )
137
 
138
 
139
- @spaces.GPU(duration=180)
140
  def wrapped_generate_text(model_choice, prompt, max_length, temperature, top_p, do_sample):
141
  return generate_text(model_choice, prompt, max_length, temperature, top_p, do_sample)
142
 
 
63
  loaded_models = {}
64
  loaded_tokenizers = {}
65
 
66
+ @spaces.GPU(duration=240)
67
  def load_model_and_tokenizer(model_key):
68
  if model_key not in loaded_models:
69
  model_info = MODELS[model_key]
70
+ device = "cuda"
71
  model = AutoModelForCausalLM.from_pretrained(
72
  model_info["model_name"],
73
  token=HF_TOKEN,
 
84
  tokenizer.pad_token = tokenizer.eos_token
85
  loaded_tokenizers[model_key] = tokenizer
86
 
87
+ @spaces.GPU(duration=240)
88
  def generate_text(model_choice, prompt, max_length, temperature, top_p, do_sample):
89
  load_model_and_tokenizer(model_choice)
90
 
91
  model = loaded_models[model_choice]
92
  tokenizer = loaded_tokenizers[model_choice]
93
+ device = "cuda"
94
 
95
  inputs = tokenizer(prompt, return_tensors="pt", truncation=True, padding=True).to(device)
96
 
 
136
  )
137
 
138
 
139
+ @spaces.GPU(duration=240)
140
  def wrapped_generate_text(model_choice, prompt, max_length, temperature, top_p, do_sample):
141
  return generate_text(model_choice, prompt, max_length, temperature, top_p, do_sample)
142