|
--- |
|
datasets: |
|
- NewEden/Orion-Asstr-Stories-16K |
|
- Mielikki/Erebus-87k |
|
base_model: |
|
- Unsloth/phi-4 |
|
tags: |
|
- phi |
|
- roleplay |
|
- finetune |
|
- storywriting |
|
--- |
|
<!DOCTYPE html> |
|
<style> |
|
html, body { |
|
background: black; |
|
color: #c9d1d9 !important; |
|
font-family: 'Segoe UI', Tahoma, Geneva, Verdana, sans-serif; |
|
margin: 0; |
|
padding: 0; |
|
min-height: 100vh; |
|
} |
|
.markdown-body { |
|
color: white; |
|
margin: 40px auto; |
|
padding: 40px; |
|
border-radius: 12px; |
|
position: relative; |
|
overflow: hidden; |
|
} |
|
|
|
.markdown-body::after { |
|
content: ''; |
|
position: absolute; |
|
top: 0; |
|
left: 0; |
|
width: 100%; |
|
height: 100%; |
|
background: #0c0f18; /* background color */ |
|
pointer-events: none; |
|
z-index: -999; |
|
} |
|
|
|
h1, h2, h3 { |
|
background: linear-gradient(45deg, #6e00ff, #00ffff); |
|
-webkit-background-clip: text; |
|
-webkit-text-fill-color: transparent; |
|
border-bottom: 1px solid #333; |
|
padding-bottom: 0.3em; |
|
} |
|
|
|
div[style*="border:2px solid #333"], |
|
div[style*="border: 2px solid #333"], |
|
div[style*="border:1px solid #333"], |
|
div[style*="border: 1px solid #333"] { |
|
background: rgba(22, 27, 34, 0.8) !important; |
|
border: 2px solid #6e00ff !important; |
|
box-shadow: 0 0 15px rgba(110, 0, 255, 0.5); |
|
border-radius: 10px; |
|
padding: 20px; |
|
margin: 20px 0; |
|
} |
|
|
|
code { |
|
background-color: #1a1a1a !important; |
|
border-radius: 4px; |
|
padding: 0.2em 0.4em; |
|
color: #00ffff; |
|
} |
|
|
|
pre { |
|
background-color: #1a1a1a !important; |
|
border: 1px solid #333; |
|
border-radius: 8px; |
|
padding: 16px; |
|
} |
|
|
|
table { |
|
width: 100%; |
|
border-collapse: collapse; |
|
margin: 20px 0; |
|
background: rgba(0,0,0,0.2); |
|
table-layout: fixed; |
|
color: white; |
|
} |
|
|
|
th, td { |
|
border: 1px solid #333; |
|
padding: 12px; |
|
text-align: center; |
|
color: white; |
|
} |
|
|
|
th { |
|
background: rgba(110, 0, 255, 0.1); |
|
} |
|
|
|
td:nth-child(1) { |
|
width: 1%; |
|
white-space: nowrap; |
|
} |
|
|
|
td:nth-child(2) { |
|
width: 100%; |
|
} |
|
|
|
td > span { |
|
display: block; |
|
padding: 4px 8px; |
|
background: rgba(110, 0, 255, 0.1); |
|
border-radius: 4px; |
|
transition: all 0.3s ease; |
|
} |
|
|
|
td > span:hover { |
|
background: rgba(110, 0, 255, 0.2); |
|
transform: translateY(-1px); |
|
} |
|
|
|
a { |
|
color: #00ffff; |
|
text-decoration: none; |
|
transition: all 0.3s ease; |
|
} |
|
|
|
a:hover { |
|
color: #6e00ff; |
|
text-decoration: none; |
|
} |
|
|
|
hr { |
|
border: 0; |
|
height: 1px; |
|
background: linear-gradient(90deg, transparent, #333, transparent); |
|
margin: 40px 0; |
|
} |
|
|
|
img { |
|
max-width: 100%; |
|
border-radius: 10px; |
|
} |
|
|
|
details summary:hover { |
|
color: #00ffff; |
|
} |
|
|
|
* { |
|
color-scheme: dark !important; |
|
} |
|
|
|
.prose, .max-w-none, .px-4 { |
|
background-color: transparent !important; |
|
color: #c9d1d9 !important; |
|
} |
|
</style> |
|
<body> |
|
<div class="markdown-body"> |
|
<div align="center"> |
|
|
|
<img src="https://cdn-uploads.huggingface.co/production/uploads/66c26b6fb01b19d8c3c2467b/o5WjJKA9f95ri9UzRxZQE.png" alt="Model Visualization" width="500px" style="border: 3px solid #333; box-shadow: 0 0 15px rgba(66, 0, 131, 0.5);" /> |
|
|
|
<br> |
|
<br> |
|
|
|
<div style="font-size:1.5em; font-weight:bold; background: linear-gradient(45deg, #6e00ff, #00ffff); -webkit-background-clip: text; -webkit-text-fill-color: transparent;"> |
|
Hamanasu 15B R1 PT |
|
</div> |
|
|
|
</div> |
|
|
|
<div style="border:1px solid #333; border-radius:10px; padding:20px; margin:20px 0; background: rgba(0,0,0,0.4);"> |
|
|
|
## 🌌 Overview |
|
|
|
<i>This is the 1st pretrain of Phi-4 with the following: </i> |
|
|
|
- `NewEden/Orion-LIT` |
|
|
|
<i>This model has *not* been instruct tuned, Ablities to converse may be reduced from the original model, If you would like to roleplay, Please use the Instruct version.</i> |
|
|
|
</div> |
|
|
|
<div style="border:2px solid #333; border-radius:10px; padding:20px; background: rgba(0,0,0,0.2);"> |
|
|
|
### ⚔️ Hardware |
|
- 4x RTX 3090 GPUs |
|
- Epochs: 1 |
|
- Base: `Unsloth/phi-4` |
|
- Amount of Tokens: 500 Million |
|
</div> |
|
|
|
<div style="border: 2px solid #6e00ff; border-radius: 10px; padding: 20px; margin: 20px 0; box-shadow: 0 0 15px rgba(110, 0, 255, 0.5);"> |
|
|
|
## Axolotl Config ꒰(˶• ᴗ •˶)꒱ |
|
|
|
<details> |
|
|
|
```yaml |
|
base_model: unsloth_phi-4 |
|
model_type: AutoModelForCausalLM |
|
tokenizer_type: AutoTokenizer |
|
|
|
#hub_model_id: NewEden/Phi4-pretrain |
|
#hub_strategy: "all_checkpoints" |
|
#push_dataset_to_hub: |
|
#hf_use_auth_token: true |
|
|
|
plugins: |
|
- axolotl.integrations.liger.LigerPlugin |
|
liger_rope: true |
|
liger_rms_norm: true |
|
liger_swiglu: true |
|
liger_fused_linear_cross_entropy: true |
|
|
|
#plugins: |
|
# - axolotl.integrations.cut_cross_entropy.CutCrossEntropyPlugin |
|
|
|
#cut_cross_entropy: true |
|
|
|
load_in_8bit: false |
|
load_in_4bit: false |
|
strict: false |
|
|
|
datasets: |
|
- path: Mielikki/Erebus-87k |
|
type: completion |
|
field: body |
|
- path: NewEden/Orion-Asstr-Stories-16K |
|
type: completion |
|
field: content |
|
shuffle_merged_datasets: true |
|
dataset_prepared_path: prepared_data |
|
val_set_size: 0.0 |
|
output_dir: ./phi4-pt-out-r2 |
|
|
|
sequence_len: 16384 |
|
sample_packing: true |
|
pad_to_sequence_len: true |
|
|
|
adapter: lora |
|
lora_model_dir: |
|
lora_r: 128 |
|
lora_alpha: 16 |
|
lora_dropout: 0.05 |
|
lora_target_modules: |
|
- gate_proj |
|
- down_proj |
|
- up_proj |
|
- q_proj |
|
- v_proj |
|
- k_proj |
|
- o_proj |
|
|
|
lora_modules_to_save: |
|
- embed_tokens |
|
- lm_head |
|
|
|
|
|
wandb_project: mag-phi |
|
wandb_entity: |
|
wandb_watch: |
|
wandb_name: attempt-02 |
|
wandb_log_model: |
|
|
|
gradient_accumulation_steps: 4 |
|
micro_batch_size: 2 |
|
num_epochs: 1 |
|
optimizer: paged_ademamix_8bit |
|
lr_scheduler: cosine |
|
learning_rate: 0.00001 |
|
|
|
train_on_inputs: false |
|
group_by_length: false |
|
bf16: auto |
|
fp16: |
|
tf32: false |
|
|
|
gradient_checkpointing: unsloth |
|
early_stopping_patience: |
|
resume_from_checkpoint: |
|
local_rank: |
|
logging_steps: 1 |
|
xformers_attention: |
|
flash_attention: true |
|
|
|
warmup_steps: 10 |
|
evals_per_epoch: 4 |
|
eval_table_size: |
|
eval_max_new_tokens: 128 |
|
saves_per_epoch: 4 |
|
debug: |
|
deepspeed: /workspace/axolotl/deepspeed_configs/zero3_bf16_cpuoffload_params.json |
|
weight_decay: 0.01 |
|
fsdp: |
|
fsdp_config: |
|
``` |
|
|
|
</details> |
|
</div> |
|
|
|
|
|
<div align="center"> |
|
|
|
<div style="border: 2px solid #6e00ff; border-radius: 10px; padding: 20px; margin: 20px 0; box-shadow: 0 0 15px rgba(110, 0, 255, 0.5);"> |
|
|
|
## ⚡ Credits |
|
<div style="display: flex; justify-content: center;"> |
|
<div style="display: grid; grid-template-columns: repeat(auto-fit, minmax(200px, 1fr)); gap: 10px; margin: 20px 0; max-width: 600px;"> |
|
|
|
<div style="border:1px solid #333; padding:10px; border-radius:5px; text-align:center; background: rgba(0,0,0,0.2); display: flex; align-items: center; justify-content: center;"> |
|
<a href="https://huggingface.co/lucyknada"> |
|
<img src="https://img.shields.io/badge/%F0%9F%8C%9F-Lucy_Knada-blueviolet" alt="Lucy Knada"> |
|
</a> |
|
</div> |
|
|
|
<div style="border:1px solid #333; padding:10px; border-radius:5px; text-align:center; background: rgba(0,0,0,0.2); display: flex; align-items: center; justify-content: center;"> |
|
<a href="https://huggingface.co/jeiku"> |
|
<img src="https://img.shields.io/badge/%E2%9A%94%EF%B8%8F-jeiku-blueviolet" alt="jeiku"> |
|
</a> |
|
</div> |
|
|
|
<div style="border:1px solid #333; padding:10px; border-radius:5px; text-align:center; background: rgba(0,0,0,0.2); display: flex; align-items: center; justify-content: center;"> |
|
<a href="https://huggingface.co/intervitens"> |
|
<img src="https://img.shields.io/badge/%F0%9F%9B%A1%EF%B8%8F-Intervitens-blueviolet" alt="Intervitens"> |
|
</a> |
|
</div> |
|
|
|
<div style="border:1px solid #333; padding:10px; border-radius:5px; text-align:center; background: rgba(0,0,0,0.2); display: flex; align-items: center; justify-content: center;"> |
|
<a href="https://huggingface.co/kalomaze"> |
|
<img src="https://img.shields.io/badge/%F0%9F%94%AE-Kalomaze-blueviolet" alt="Kalomaze"> |
|
</a> |
|
</div> |
|
|
|
<div style="border:1px solid #333; padding:10px; border-radius:5px; text-align:center; background: rgba(0,0,0,0.2); display: flex; align-items: center; justify-content: center;"> |
|
<a href="https://huggingface.co/kubernetes-bad"> |
|
<img src="https://img.shields.io/badge/%E2%9A%A1-Kubernetes_Bad-blueviolet" alt="Kubernetes Bad"> |
|
</a> |
|
</div> |
|
|
|
<div style="border:1px solid #333; padding:10px; border-radius:5px; text-align:center; background: rgba(0,0,0,0.2); display: flex; align-items: center; justify-content: center;"> |
|
<a href="https://huggingface.co/anthracite-org"> |
|
<img src="https://img.shields.io/badge/%F0%9F%8C%91-Anthracite-blueviolet" alt="Anthracite"> |
|
</a> |
|
</div> |
|
</div> |
|
</div> |
|
</div> |
|
|
|
--- |
|
|
|
<div align="center"> |
|
<div style="font-size:0.8em; opacity:0.8;">Made by</div> |
|
<div style="font-size:1.2em; font-weight:bold; background: linear-gradient(45deg, #6e00ff, #00ffff); -webkit-background-clip: text; -webkit-text-fill-color: transparent;">Delta-Vector</div> |
|
</div> |
|
|
|
</div> |
|
</body> |
|
</html> |