Qwen2.5-DeepHyper / mergekit_config.yml
CultriX's picture
Upload folder using huggingface_hub
5f6216b verified
base_model: Qwen/Qwen2.5-14B
models:
# Each adapter was extracted (rank=128) from its respective finetuned model.
# Their weights are set lower than the full instruct model (which is now the base)
- model: CultriX/Qwen2.5-14B-Hyperionv3_r128
parameters:
weight: 0.9 # Reduced weight relative to base
density: 0.9
- model: CultriX/Qwen2.5-14B_Virtuoso-small-v2-LoRA_r128
parameters:
weight: 1.0
density: 1.0
- model: Qwen/Qwen2.5-14B-Instruct
parameters:
weight: 0.75
density: 0.75
- model: /root/.cache/huggingface/hub/models--CultriX--Qwen2.5-14B-DeepSeek_r128/snapshots/1bca847f92fced165076d9ac921a1e3ef01fcd7f/
parameters:
weight: 1.00
density: 1.00
# Merging method and overall parameters
merge_method: dare_ties # Ties corresponding weights across sources.
parameters:
weight: 1.0 # Overall scaling factor.
density: 1.0 # Overall density (typically left at 1.0).
normalize: true # Normalize each set of weights before merging.
int8_mask: true # Enable masking if using int8 quantized weights.
# Use the instruct tokenizer to ensure compatibility.
tokenizer_source: CultriX/Qwen2.5-14B_Virtuoso-small-v2-LoRA_r128
# Data type for merged weights.
dtype: bfloat16