ssands1979 commited on
Commit
1ef2a23
·
verified ·
1 Parent(s): 971e0e9

Upload folder using huggingface_hub

Browse files
Files changed (2) hide show
  1. README.md +84 -0
  2. config.json +40 -0
README.md ADDED
@@ -0,0 +1,84 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ license: apache-2.0
3
+ tags:
4
+ - moe
5
+ - frankenmoe
6
+ - merge
7
+ - mergekit
8
+ - lazymergekit
9
+ - lxuechen/phi-2-sft
10
+ - mrm8488/phi-2-coder
11
+ - Walmart-the-bag/phi-2-uncensored
12
+ - ArtifactAI/phi-2-arxiv-physics-instruct
13
+ base_model:
14
+ - lxuechen/phi-2-sft
15
+ - mrm8488/phi-2-coder
16
+ - Walmart-the-bag/phi-2-uncensored
17
+ - ArtifactAI/phi-2-arxiv-physics-instruct
18
+ ---
19
+
20
+ # FrankenPhi2-4x
21
+
22
+ FrankenPhi2-4x is a Mixure of Experts (MoE) made with the following models using [LazyMergekit](https://colab.research.google.com/drive/1obulZ1ROXHjYLn6PPZJwRR6GzgQogxxb?usp=sharing):
23
+ * [lxuechen/phi-2-sft](https://huggingface.co/lxuechen/phi-2-sft)
24
+ * [mrm8488/phi-2-coder](https://huggingface.co/mrm8488/phi-2-coder)
25
+ * [Walmart-the-bag/phi-2-uncensored](https://huggingface.co/Walmart-the-bag/phi-2-uncensored)
26
+ * [ArtifactAI/phi-2-arxiv-physics-instruct](https://huggingface.co/ArtifactAI/phi-2-arxiv-physics-instruct)
27
+
28
+ ## 🧩 Configuration
29
+
30
+ ```yaml
31
+ base_model: microsoft/phi-2
32
+ experts:
33
+ - source_model: lxuechen/phi-2-sft
34
+ positive_prompts:
35
+ - "chat"
36
+ - "assistant"
37
+ - "tell me"
38
+ - "explain"
39
+ - source_model: mrm8488/phi-2-coder
40
+ positive_prompts:
41
+ - "code"
42
+ - "python"
43
+ - "javascript"
44
+ - "programming"
45
+ - "algorithm"
46
+ - source_model: Walmart-the-bag/phi-2-uncensored
47
+ positive_prompts:
48
+ - "storywriting"
49
+ - "write"
50
+ - "scene"
51
+ - "story"
52
+ - "character"
53
+ - source_model: ArtifactAI/phi-2-arxiv-physics-instruct
54
+ positive_prompts:
55
+ - "physics"
56
+ - "math"
57
+ - "mathematics"
58
+ - "solve"
59
+ - "count"
60
+ ```
61
+
62
+ ## 💻 Usage
63
+
64
+ ```python
65
+ !pip install -qU transformers bitsandbytes accelerate
66
+
67
+ from transformers import AutoTokenizer
68
+ import transformers
69
+ import torch
70
+
71
+ model = "ssands1979/FrankenPhi2-4x"
72
+
73
+ tokenizer = AutoTokenizer.from_pretrained(model)
74
+ pipeline = transformers.pipeline(
75
+ "text-generation",
76
+ model=model,
77
+ model_kwargs={"torch_dtype": torch.float16, "load_in_4bit": True},
78
+ )
79
+
80
+ messages = [{"role": "user", "content": "Explain what a Mixture of Experts is in less than 100 words."}]
81
+ prompt = pipeline.tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
82
+ outputs = pipeline(prompt, max_new_tokens=256, do_sample=True, temperature=0.7, top_k=50, top_p=0.95)
83
+ print(outputs[0]["generated_text"])
84
+ ```
config.json ADDED
@@ -0,0 +1,40 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "microsoft/phi-2",
3
+ "architectures": [
4
+ "MixtralForCausalLM"
5
+ ],
6
+ "attention_dropout": 0.0,
7
+ "auto_map": {
8
+ "AutoConfig": "microsoft/phi-2--configuration_phi.PhiConfig",
9
+ "AutoModelForCausalLM": "microsoft/phi-2--modeling_phi.PhiForCausalLM"
10
+ },
11
+ "bos_token_id": null,
12
+ "embd_pdrop": 0.0,
13
+ "eos_token_id": null,
14
+ "hidden_act": "gelu_new",
15
+ "hidden_size": 2560,
16
+ "initializer_range": 0.02,
17
+ "intermediate_size": 10240,
18
+ "layer_norm_eps": 1e-05,
19
+ "max_position_embeddings": 2048,
20
+ "model_type": "mixtral",
21
+ "num_attention_heads": 32,
22
+ "num_experts_per_tok": 2,
23
+ "num_hidden_layers": 32,
24
+ "num_key_value_heads": 32,
25
+ "num_local_experts": 4,
26
+ "output_router_logits": false,
27
+ "partial_rotary_factor": 0.4,
28
+ "qk_layernorm": false,
29
+ "resid_pdrop": 0.1,
30
+ "rms_norm_eps": 1e-06,
31
+ "rope_scaling": null,
32
+ "rope_theta": 10000.0,
33
+ "router_aux_loss_coef": 0.001,
34
+ "sliding_window": null,
35
+ "tie_word_embeddings": false,
36
+ "torch_dtype": "float16",
37
+ "transformers_version": "4.37.0",
38
+ "use_cache": true,
39
+ "vocab_size": 51200
40
+ }