Spaces:

5to9
/

bot-royale

Runtime error

App Files Files Community

5to9 commited on Sep 27, 2024

Commit

65118fd

1 Parent(s): 0e52052

0.24 Load in 8bit

Browse files

Files changed (1) hide show

app.py +3 -22

app.py CHANGED Viewed

@@ -9,7 +9,6 @@ import os
 import traceback
 from threading import Thread
-import subprocess
 # Status: Breaks during generation
@@ -56,22 +55,6 @@ def apply_chat_template(messages, add_generation_prompt=False):
     return pharia_template
-def check_gpu_status():
-    try:
-        # Run the nvidia-smi command and capture the output
-        result = subprocess.run(['nvidia-smi'], stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True)
-        # Log the output of nvidia-smi
-        logging.info("NVIDIA-SMI Output:\n%s", result.stdout)
-        # If there's any error in the subprocess, log it as well
-        if result.stderr:
-            logging.error("NVIDIA-SMI Error Output:\n%s", result.stderr)
-    except Exception as e:
-        logging.error(f"Error running nvidia-smi: {e}")
 def load_model_a(model_id):
     global tokenizer_a, model_a, model_id_a
     try:
@@ -80,13 +63,12 @@ def load_model_a(model_id):
         model_a = AutoModelForCausalLM.from_pretrained(
             model_id,
             torch_dtype=torch.float16,
-            device_map="auto", # {"": "cuda:0"} returns "must be Stateless GPU environment"
             trust_remote_code=True,
         )
     except Exception as e:
         logging.error(f'{SPACER} Error: {e}, Traceback {traceback.format_exc()}')
-    finally:
-        check_gpu_status()
     return gr.update(label=model_id)
@@ -99,12 +81,11 @@ def load_model_b(model_id):
             model_id,
             torch_dtype=torch.float16,
             device_map="auto",
             trust_remote_code=True,
         )
     except Exception as e:
         logging.error(f'{SPACER} Error: {e}, Traceback {traceback.format_exc()}')
-    finally:
-        check_gpu_status()
     return gr.update(label=model_id)
 @spaces.GPU()

 import traceback
 from threading import Thread
 # Status: Breaks during generation
     return pharia_template
 def load_model_a(model_id):
     global tokenizer_a, model_a, model_id_a
     try:
         model_a = AutoModelForCausalLM.from_pretrained(
             model_id,
             torch_dtype=torch.float16,
+            device_map="auto",
+            load_in_8bit=True,
             trust_remote_code=True,
         )
     except Exception as e:
         logging.error(f'{SPACER} Error: {e}, Traceback {traceback.format_exc()}')
     return gr.update(label=model_id)
             model_id,
             torch_dtype=torch.float16,
             device_map="auto",
+            load_in_8bit=True,
             trust_remote_code=True,
         )
     except Exception as e:
         logging.error(f'{SPACER} Error: {e}, Traceback {traceback.format_exc()}')
     return gr.update(label=model_id)
 @spaces.GPU()