Update README.md
Browse files
README.md
CHANGED
@@ -50,38 +50,49 @@ Quickly get inference running with the following required installation:
|
|
50 |
Now, proceed as usual with HuggingFace:
|
51 |
```python
|
52 |
from transformers import AutoModelForCausalLM, AutoTokenizer
|
53 |
-
|
54 |
model_name = "avemio/GRAG-PHI-3.5-MINI-4B-SFT-HESSIAN-AI"
|
55 |
-
|
56 |
model = AutoModelForCausalLM.from_pretrained(
|
57 |
model_name,
|
58 |
torch_dtype="auto",
|
59 |
device_map="auto"
|
60 |
)
|
61 |
tokenizer = AutoTokenizer.from_pretrained(model_name)
|
62 |
-
|
63 |
-
|
|
|
64 |
messages = [
|
65 |
-
{"role": "system", "content": ""},
|
66 |
-
{"role": "user", "content":
|
67 |
]
|
68 |
text = tokenizer.apply_chat_template(
|
69 |
messages,
|
70 |
tokenize=False,
|
71 |
-
add_generation_prompt=
|
72 |
)
|
73 |
model_inputs = tokenizer([text], return_tensors="pt").to(model.device)
|
74 |
-
|
75 |
generated_ids = model.generate(
|
76 |
**model_inputs,
|
77 |
-
max_new_tokens
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
78 |
)
|
79 |
generated_ids = [
|
80 |
output_ids[len(input_ids):] for input_ids, output_ids in zip(model_inputs.input_ids, generated_ids)
|
81 |
]
|
82 |
-
|
83 |
response = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)[0]
|
84 |
-
|
85 |
```
|
86 |
|
87 |
### [](https://huggingface.co/Qwen/Qwen2.5-Coder-32B-Instruct#processing-long-texts)
|
|
|
50 |
Now, proceed as usual with HuggingFace:
|
51 |
```python
|
52 |
from transformers import AutoModelForCausalLM, AutoTokenizer
|
53 |
+
|
54 |
model_name = "avemio/GRAG-PHI-3.5-MINI-4B-SFT-HESSIAN-AI"
|
55 |
+
|
56 |
model = AutoModelForCausalLM.from_pretrained(
|
57 |
model_name,
|
58 |
torch_dtype="auto",
|
59 |
device_map="auto"
|
60 |
)
|
61 |
tokenizer = AutoTokenizer.from_pretrained(model_name)
|
62 |
+
im_end_token_id = tokenizer.convert_tokens_to_ids('<|im_end|>')
|
63 |
+
im_start_token_id = tokenizer.convert_tokens_to_ids('<|im_start|>')
|
64 |
+
|
65 |
messages = [
|
66 |
+
{"role": "system", "content": "Folge den Anweisungen des Benutzers. Bevor du deine finale Antwort gibst, schildere deine 脺berlegungen zur L枚sung des Problems."},
|
67 |
+
{"role": "user", "content": "Ferdinand steht vor der Herausforderung, eine faire Besuchsregelung f眉r seine drei Kinder zu finden, die den Bed眉rfnissen jedes einzelnen Kindes gerecht wird. Jedes Kind hat unterschiedliche Vorlieben und Bed眉rfnisse, die in den Besuchsplan integriert werden m眉ssen. Er muss sicherstellen, dass die Regelung sowohl den Interessen der Kinder als auch den rechtlichen Vorgaben entspricht. Ferdinand hat eine Woche Zeit, um einen Vorschlag zu erarbeiten, den er mit seinem Anwalt besprechen kann."}
|
68 |
]
|
69 |
text = tokenizer.apply_chat_template(
|
70 |
messages,
|
71 |
tokenize=False,
|
72 |
+
add_generation_prompt=False
|
73 |
)
|
74 |
model_inputs = tokenizer([text], return_tensors="pt").to(model.device)
|
75 |
+
|
76 |
generated_ids = model.generate(
|
77 |
**model_inputs,
|
78 |
+
#max_new_tokens=-1,
|
79 |
+
max_length=2024,
|
80 |
+
temperature=0.01,
|
81 |
+
do_sample=False,
|
82 |
+
#bos_token_id=im_start_token_id,
|
83 |
+
eos_token_id=im_end_token_id,
|
84 |
+
pad_token_id=tokenizer.eos_token_id,
|
85 |
+
repetition_penalty=1.1,
|
86 |
+
num_return_sequences=1,
|
87 |
+
top_k=40,
|
88 |
+
top_p=0.95,
|
89 |
)
|
90 |
generated_ids = [
|
91 |
output_ids[len(input_ids):] for input_ids, output_ids in zip(model_inputs.input_ids, generated_ids)
|
92 |
]
|
93 |
+
|
94 |
response = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)[0]
|
95 |
+
|
96 |
```
|
97 |
|
98 |
### [](https://huggingface.co/Qwen/Qwen2.5-Coder-32B-Instruct#processing-long-texts)
|