MiniCPM-1B

Runtime error

vilarin commited on Jun 28, 2024

Commit

652ef04

verified ·

1 Parent(s): d83af28

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -36,15 +36,16 @@ h3 {
     text-align: center;
 }
 """
-model = AutoModelForCausalLM.from_pretrained(
-    MODELS,
-    device_map="auto",
-    quantization_config=BitsAndBytesConfig(load_in_4bit=True)
         )
-tokenizer = GemmaTokenizerFast.from_pretrained(MODELS)
-model.config.sliding_window = 4096
-model.eval()
 @spaces.GPU(duration=90)
 def stream_chat(message: str, history: list, temperature: float, max_new_tokens: int, top_p: float, top_k: int, penalty: float):
@@ -75,7 +76,6 @@ def stream_chat(message: str, history: list, temperature: float, max_new_tokens:
         do_sample=True,
         temperature=temperature,
         num_beams=1,
-        repetition_penalty=repetition_penalty,
     )
     thread = Thread(target=model.generate, kwargs=generate_kwargs)

     text-align: center;
 }
 """
+if torch.cuda.is_available():
+    model = AutoModelForCausalLM.from_pretrained(
+        MODELS,
+        device_map="auto",
+        quantization_config=BitsAndBytesConfig(load_in_4bit=True)
         )
+    tokenizer = GemmaTokenizerFast.from_pretrained(MODELS)
+    model.config.sliding_window = 4096
+    model.eval()
 @spaces.GPU(duration=90)
 def stream_chat(message: str, history: list, temperature: float, max_new_tokens: int, top_p: float, top_k: int, penalty: float):
         do_sample=True,
         temperature=temperature,
         num_beams=1,
     )
     thread = Thread(target=model.generate, kwargs=generate_kwargs)