Spaces:

arya-ai-model
/

ai

Sleeping

arya-ai-model commited on Feb 17

Commit

e2116c0

1 Parent(s): ad9f174

updated model.py

Files changed (1) hide show

model.py CHANGED Viewed

@@ -5,7 +5,6 @@ import torch
 MODEL_NAME = "bigcode/starcoderbase-1b"
 HF_TOKEN = os.getenv("HUGGINGFACE_TOKEN")
-# Force CPU mode
 device = "cpu"
 # Load tokenizer and model
@@ -18,23 +17,34 @@ if tokenizer.pad_token is None:
 model = AutoModelForCausalLM.from_pretrained(
     MODEL_NAME,
     token=HF_TOKEN,
-    torch_dtype=torch.float32,  # Use float32 for CPU
     trust_remote_code=True
-).to(device)  # Move model explicitly to CPU
 def generate_code(prompt: str, max_tokens: int = 256):
     inputs = tokenizer(
-        prompt,
         return_tensors="pt",
         padding=True,
-        truncation=True,  # Allow truncation
-        max_length=1024  # Set a maximum length explicitly
     ).to(device)
     output = model.generate(
         **inputs,
         max_new_tokens=max_tokens,
-        pad_token_id=tokenizer.pad_token_id
     )
-    return tokenizer.decode(output[0], skip_special_tokens=True)

 MODEL_NAME = "bigcode/starcoderbase-1b"
 HF_TOKEN = os.getenv("HUGGINGFACE_TOKEN")
 device = "cpu"
 # Load tokenizer and model
 model = AutoModelForCausalLM.from_pretrained(
     MODEL_NAME,
     token=HF_TOKEN,
+    torch_dtype=torch.float32,  # Ensure compatibility with CPU
     trust_remote_code=True
+).to(device)
 def generate_code(prompt: str, max_tokens: int = 256):
+    formatted_prompt = f"# Python\n{prompt}\n\n"  # Ensure the model understands it's code
     inputs = tokenizer(
+        formatted_prompt,
         return_tensors="pt",
         padding=True,
+        truncation=True,
+        max_length=1024  # Explicit max length to prevent issues
     ).to(device)
     output = model.generate(
         **inputs,
         max_new_tokens=max_tokens,
+        pad_token_id=tokenizer.pad_token_id,
+        do_sample=True,  # Enable randomness for better outputs
+        top_p=0.95,  # Nucleus sampling to improve generation
+        temperature=0.7  # Control creativity
     )
+    generated_code = tokenizer.decode(output[0], skip_special_tokens=True)
+    # Clean the output: remove the repeated prompt at the start
+    if generated_code.startswith(formatted_prompt):
+        generated_code = generated_code[len(formatted_prompt):]
+    return generated_code.strip()