jburtoft commited on
Commit
138dffe
·
1 Parent(s): 247cf38

Update README.md

Browse files

Initial commit with just notes

Files changed (1) hide show
  1. README.md +54 -0
README.md CHANGED
@@ -1,3 +1,57 @@
1
  ---
2
  license: llama2
3
  ---
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  ---
2
  license: llama2
3
  ---
4
+ Quick notes--what I did to get to this point
5
+
6
+ ```
7
+ from optimum.neuron import NeuronModelForCausalLM
8
+ from transformers import AutoTokenizer
9
+ model_id = "TencentARC/LLaMA-Pro-8B"
10
+ compiler_args = {"num_cores": 2, "auto_cast_type": "fp16"}
11
+ input_shapes = {"sequence_length": 2048, "batch_size": 2 }
12
+ llm = NeuronModelForCausalLM.from_pretrained(model_id, export=True, **input_shapes, **compiler_args)
13
+ save_directory = "Tencent_neuron"
14
+
15
+ llm.save_pretrained(save_directory)
16
+ tokenizer = AutoTokenizer.from_pretrained(model_id)
17
+
18
+ tokenizer.save_pretrained(save_directory)
19
+
20
+ quit()
21
+ ```
22
+
23
+ ```
24
+ from optimum.neuron import pipeline
25
+
26
+ # Load pipeline from Hugging Face repository
27
+ save_directory = "Tencent_neuron"
28
+
29
+ pipe = pipeline("text-generation", save_directory)
30
+
31
+ # We use the tokenizer's chat template to format each message - see https://huggingface.co/docs/transformers/main/en/chat_templating
32
+ messages = [
33
+ {"role": "user", "content": "What is 2+2?"},
34
+ ]
35
+ prompt = pipe.tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
36
+ # Run generation
37
+ outputs = pipe(prompt, max_new_tokens=2048, do_sample=True, temperature=0.7, top_k=50, top_p=0.95)
38
+ print(outputs[0]["generated_text"])
39
+
40
+ ```
41
+ ```
42
+ from huggingface_hub import login
43
+ from huggingface_hub import HfApi
44
+ api = HfApi()
45
+ login()
46
+
47
+
48
+ save_directory = "Tencent_neuron"
49
+
50
+ api.upload_folder(
51
+ folder_path=save_directory,
52
+ repo_id="jburtoft/TencentARC-LLaMA-Pro-8B-Neuron",
53
+ repo_type="model",
54
+ multi_commits=True,
55
+ multi_commits_verbose=True,
56
+ )
57
+ ```