Kukedlc commited on
Commit
d1ad328
·
verified ·
1 Parent(s): 3b8fa08

Update README.md

Browse files
Files changed (1) hide show
  1. README.md +34 -7
README.md CHANGED
@@ -53,27 +53,54 @@ parameters:
53
  dtype: bfloat16
54
  ```
55
 
56
- ## 💻 Usage
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
57
 
58
  ```python
59
- !pip install -qU transformers accelerate
60
 
61
  from transformers import AutoTokenizer
62
  import transformers
63
  import torch
64
 
65
  model = "Kukedlc/NeuralShiva-7B-DT"
66
- messages = [{"role": "user", "content": "What is a large language model?"}]
67
 
68
  tokenizer = AutoTokenizer.from_pretrained(model)
69
- prompt = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
70
  pipeline = transformers.pipeline(
71
  "text-generation",
72
  model=model,
73
- torch_dtype=torch.float16,
74
- device_map="auto",
75
  )
76
 
 
 
77
  outputs = pipeline(prompt, max_new_tokens=256, do_sample=True, temperature=0.7, top_k=50, top_p=0.95)
78
  print(outputs[0]["generated_text"])
79
- ```
 
53
  dtype: bfloat16
54
  ```
55
 
56
+
57
+ ## 💻 Usage - Stream
58
+
59
+ ```python
60
+ # Requirements
61
+ !pip install -qU transformers accelerate bitsandbytes
62
+
63
+ # Imports & settings
64
+ from transformers import AutoModelForCausalLM, AutoTokenizer, TextStreamer
65
+ import warnings
66
+ import os
67
+ os.environ["TOKENIZERS_PARALLELISM"] = "false"
68
+ warnings.filterwarnings('ignore')
69
+
70
+ # Model & Tokenizer
71
+ MODEL_NAME = "Kukedlc/NeuralShiva-7B-DT"
72
+ model = AutoModelForCausalLM.from_pretrained(MODEL_NAME, device_map='cuda:1', load_in_4bit=True)
73
+ tok = AutoTokenizer.from_pretrained(MODEL_NAME)
74
+
75
+ # Inference
76
+ prompt = "I want you to generate a theory that unites quantum mechanics with the theory of relativity and cosmic consciousness"
77
+ inputs = tok([prompt], return_tensors="pt").to('cuda')
78
+ streamer = TextStreamer(tok)
79
+
80
+ # Despite returning the usual output, the streamer will also print the generated text to stdout.
81
+ _ = model.generate(**inputs, streamer=streamer, max_new_tokens=512, do_sample=True, num_beams=1, top_p=0.9, temperature=0.7)
82
+
83
+ ```
84
+ ## 💻 Usage - Clasic
85
 
86
  ```python
87
+ !pip install -qU transformers bitsandbytes accelerate
88
 
89
  from transformers import AutoTokenizer
90
  import transformers
91
  import torch
92
 
93
  model = "Kukedlc/NeuralShiva-7B-DT"
 
94
 
95
  tokenizer = AutoTokenizer.from_pretrained(model)
 
96
  pipeline = transformers.pipeline(
97
  "text-generation",
98
  model=model,
99
+ model_kwargs={"torch_dtype": torch.float16, "load_in_4bit": True},
 
100
  )
101
 
102
+ messages = [{"role": "user", "content": "Explain what a Mixture of Experts is in less than 100 words."}]
103
+ prompt = pipeline.tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
104
  outputs = pipeline(prompt, max_new_tokens=256, do_sample=True, temperature=0.7, top_k=50, top_p=0.95)
105
  print(outputs[0]["generated_text"])
106
+ ```