from transformers import AutoTokenizer, AutoModelForCausalLM # Load your fine-tuned model and tokenizer tokenizer = AutoTokenizer.from_pretrained("./lockin_model") model = AutoModelForCausalLM.from_pretrained("./lockin_model") # Function to generate yes/no questions def generate_question(input_text, max_retries=20): for _ in range(max_retries): # Add padding and attention mask inputs = tokenizer( input_text, return_tensors="pt", padding=True, truncation=True, return_attention_mask=True ) output = model.generate( inputs["input_ids"], attention_mask=inputs["attention_mask"], max_new_tokens=100, do_sample=True, temperature=1.9, top_p=0.8, top_k=50, pad_token_id=tokenizer.eos_token_id ) generated_text = tokenizer.decode(output[0], skip_special_tokens=True) # Remove the input text from the generated output if generated_text.startswith(input_text): generated_text = generated_text[len(input_text):].strip() # If we got a non-empty response and it contains $LOCKIN, return it if generated_text and "$LOCKIN" in generated_text: return generated_text # If all retries failed, return default question return "Does $LOCKIN look great?" # Example usage prompt = "I need a yes/no question about $LOCKIN." question = generate_question(prompt) print("Generated Question:", question)