Fix: GPU usage issue
Browse files
app.py
CHANGED
@@ -53,7 +53,6 @@ def combine_model_parts(model_dir="split_models", output_file="checkpoints/last.
|
|
53 |
print(f"Model combined successfully: {output_file}")
|
54 |
return output_file
|
55 |
|
56 |
-
@spaces.GPU(enable_queue=True)
|
57 |
def load_model():
|
58 |
"""
|
59 |
Load the SmollmV2 model and tokenizer.
|
@@ -89,6 +88,8 @@ def load_model():
|
|
89 |
except Exception as e:
|
90 |
raise RuntimeError(f"Error loading model: {str(e)}")
|
91 |
|
|
|
|
|
92 |
|
93 |
@spaces.GPU(enable_queue=True)
|
94 |
def generate_text(prompt, num_tokens, temperature=0.8, top_p=0.9):
|
@@ -142,13 +143,6 @@ def generate_text(prompt, num_tokens, temperature=0.8, top_p=0.9):
|
|
142 |
except Exception as e:
|
143 |
return f"Error during text generation: {str(e)}"
|
144 |
|
145 |
-
# Load the model globally
|
146 |
-
try:
|
147 |
-
model, tokenizer, device = load_model()
|
148 |
-
except Exception as e:
|
149 |
-
print(f"Error initializing model: {str(e)}")
|
150 |
-
raise
|
151 |
-
|
152 |
# Create the Gradio interface
|
153 |
demo = gr.Interface(
|
154 |
fn=generate_text,
|
|
|
53 |
print(f"Model combined successfully: {output_file}")
|
54 |
return output_file
|
55 |
|
|
|
56 |
def load_model():
|
57 |
"""
|
58 |
Load the SmollmV2 model and tokenizer.
|
|
|
88 |
except Exception as e:
|
89 |
raise RuntimeError(f"Error loading model: {str(e)}")
|
90 |
|
91 |
+
# Load the model globally
|
92 |
+
model, tokenizer, device = load_model()
|
93 |
|
94 |
@spaces.GPU(enable_queue=True)
|
95 |
def generate_text(prompt, num_tokens, temperature=0.8, top_p=0.9):
|
|
|
143 |
except Exception as e:
|
144 |
return f"Error during text generation: {str(e)}"
|
145 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
146 |
# Create the Gradio interface
|
147 |
demo = gr.Interface(
|
148 |
fn=generate_text,
|