base_model: Qwen/Qwen2.5-1.5B-Instruct tags: - text-generation-inference - transformers - trl - grpo license: apache-2.0 language: - en
config = { "rank": 8, "alpha": 16, "learning_rate": 2e-5, "target_modules": ["mlps"] }