Spaces:

AstroMLab
/

AstroSage-8B

Runtime error

Tijmen2 commited on Nov 20, 2024

Commit

034153d

verified ·

1 Parent(s): 7efaceb

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -24,7 +24,7 @@ model = AutoModelForCausalLM.from_pretrained(
     device_map="auto",
     use_safetensors=True,
     trust_remote_code=True,
-    load_in_8bit=True,
     torch_dtype=torch.bfloat16
 )
@@ -36,11 +36,13 @@ GREETING_MESSAGES = [
     "The universe awaits! I'm AstroSage. What astronomical wonders shall we discuss?",
 ]
 def format_message(role: str, content: str) -> str:
     """Format a single message according to Llama-3 chat template."""
     return f"<|start_header_id|>{role}<|end_header_id|>\n\n{content}<|eot_id|>"
-def generate_text(prompt: str, history: list, max_new_tokens=512, temperature=0.7, top_p=0.95, top_k=50):
     """
     Generate a response using the transformer model with proper Llama-3 chat formatting.
     """
@@ -81,9 +83,7 @@ def generate_text(prompt: str, history: list, max_new_tokens=512, temperature=0.
         max_new_tokens=max_new_tokens,
         do_sample=True,
         top_p=top_p,
-        top_k=top_k,
         temperature=temperature,
-        num_beams=1,
     )
     # Generate the response in a separate thread for streaming

     device_map="auto",
     use_safetensors=True,
     trust_remote_code=True,
+    load_in_4bit=True,
     torch_dtype=torch.bfloat16
 )
     "The universe awaits! I'm AstroSage. What astronomical wonders shall we discuss?",
 ]
 def format_message(role: str, content: str) -> str:
     """Format a single message according to Llama-3 chat template."""
     return f"<|start_header_id|>{role}<|end_header_id|>\n\n{content}<|eot_id|>"
+def generate_text(prompt: str, history: list, max_new_tokens=512, temperature=0.7, top_p=0.95):
     """
     Generate a response using the transformer model with proper Llama-3 chat formatting.
     """
         max_new_tokens=max_new_tokens,
         do_sample=True,
         top_p=top_p,
         temperature=temperature,
     )
     # Generate the response in a separate thread for streaming