myr1-2

Build error

App Files Files Community

wuhp commited on Jan 30

Commit

eabbd4b

verified ·

1 Parent(s): 5a9af80

Update app.py

Browse files

Files changed (1) hide show

app.py +31 -4

app.py CHANGED Viewed

@@ -2,34 +2,53 @@ import gradio as gr
 import torch
 from transformers import AutoTokenizer, AutoModelForCausalLM
 MODEL_REPO = "wuhp/myr1"
 SUBFOLDER = "myr1"
 tokenizer = AutoTokenizer.from_pretrained(
     MODEL_REPO,
     subfolder=SUBFOLDER,
     trust_remote_code=True
 )
-# If your GPU has <24GB VRAM, consider 8-bit or CPU offloading
 model = AutoModelForCausalLM.from_pretrained(
     MODEL_REPO,
     subfolder=SUBFOLDER,
     trust_remote_code=True,
-    device_map="auto",           # tries to place layers on GPU, then CPU if needed
-    torch_dtype=torch.float16,    # or bfloat16 or float32
     low_cpu_mem_usage=True
 )
 model.eval()
 def generate_text(prompt, max_length=64, temperature=0.7, top_p=0.9):
     print("=== Starting generation ===")
     inputs = tokenizer(prompt, return_tensors="pt").to(model.device)
     try:
         output_ids = model.generate(
             **inputs,
-            max_new_tokens=max_length,   # alternative to max_length
             temperature=temperature,
             top_p=top_p,
             do_sample=True,
@@ -39,8 +58,13 @@ def generate_text(prompt, max_length=64, temperature=0.7, top_p=0.9):
     except Exception as e:
         print(f"Error during generation: {e}")
         return str(e)
     return tokenizer.decode(output_ids[0], skip_special_tokens=True)
 demo = gr.Interface(
     fn=generate_text,
     inputs=[
@@ -58,5 +82,8 @@ demo = gr.Interface(
     description="Generates text using the large DeepSeek model."
 )
 if __name__ == "__main__":
     demo.launch()

 import torch
 from transformers import AutoTokenizer, AutoModelForCausalLM
+# ----------------------------------------------------------------
+# 1) Points to your Hugging Face repo and subfolder
+#    (where config.json, tokenizer.json, model safetensors, etc. reside).
+# ----------------------------------------------------------------
 MODEL_REPO = "wuhp/myr1"
 SUBFOLDER = "myr1"
+# ----------------------------------------------------------------
+# 2) Load the tokenizer
+#    trust_remote_code=True allows custom code (e.g., DeepSeek config/classes).
+# ----------------------------------------------------------------
 tokenizer = AutoTokenizer.from_pretrained(
     MODEL_REPO,
     subfolder=SUBFOLDER,
     trust_remote_code=True
 )
+# ----------------------------------------------------------------
+# 3) Load the model
+#    - device_map="auto" tries to place layers on GPU and offload remainder to CPU if needed
+#    - torch_dtype can be float16, float32, bfloat16, etc., depending on GPU support
+# ----------------------------------------------------------------
 model = AutoModelForCausalLM.from_pretrained(
     MODEL_REPO,
     subfolder=SUBFOLDER,
     trust_remote_code=True,
+    device_map="auto",
+    torch_dtype=torch.float16,
     low_cpu_mem_usage=True
 )
+# Put model in evaluation mode
 model.eval()
+# ----------------------------------------------------------------
+# 4) Define the generation function
+# ----------------------------------------------------------------
 def generate_text(prompt, max_length=64, temperature=0.7, top_p=0.9):
     print("=== Starting generation ===")
+    # Move input tokens to the same device as model
     inputs = tokenizer(prompt, return_tensors="pt").to(model.device)
     try:
+        # Generate tokens
         output_ids = model.generate(
             **inputs,
+            max_new_tokens=max_length,  # This controls how many tokens beyond the prompt are generated
             temperature=temperature,
             top_p=top_p,
             do_sample=True,
     except Exception as e:
         print(f"Error during generation: {e}")
         return str(e)
+    # Decode back to text (skipping special tokens)
     return tokenizer.decode(output_ids[0], skip_special_tokens=True)
+# ----------------------------------------------------------------
+# 5) Build a Gradio UI
+# ----------------------------------------------------------------
 demo = gr.Interface(
     fn=generate_text,
     inputs=[
     description="Generates text using the large DeepSeek model."
 )
+# ----------------------------------------------------------------
+# 6) Run the Gradio app
+# ----------------------------------------------------------------
 if __name__ == "__main__":
     demo.launch()