avemio-digital commited on
Commit
ae3ac50
verified
1 Parent(s): 6520b6e

Update README.md

Browse files
Files changed (1) hide show
  1. README.md +22 -11
README.md CHANGED
@@ -50,38 +50,49 @@ Quickly get inference running with the following required installation:
50
  Now, proceed as usual with HuggingFace:
51
  ```python
52
  from transformers import AutoModelForCausalLM, AutoTokenizer
53
-
54
  model_name = "avemio/GRAG-PHI-3.5-MINI-4B-SFT-HESSIAN-AI"
55
-
56
  model = AutoModelForCausalLM.from_pretrained(
57
  model_name,
58
  torch_dtype="auto",
59
  device_map="auto"
60
  )
61
  tokenizer = AutoTokenizer.from_pretrained(model_name)
62
-
63
- prompt = "Folge den Anweisungen des Benutzers. Bevor du deine finale Antwort gibst, schildere deine 脺berlegungen zur L枚sung des Problems."
 
64
  messages = [
65
- {"role": "system", "content": ""},
66
- {"role": "user", "content": prompt}
67
  ]
68
  text = tokenizer.apply_chat_template(
69
  messages,
70
  tokenize=False,
71
- add_generation_prompt=True
72
  )
73
  model_inputs = tokenizer([text], return_tensors="pt").to(model.device)
74
-
75
  generated_ids = model.generate(
76
  **model_inputs,
77
- max_new_tokens=512
 
 
 
 
 
 
 
 
 
 
78
  )
79
  generated_ids = [
80
  output_ids[len(input_ids):] for input_ids, output_ids in zip(model_inputs.input_ids, generated_ids)
81
  ]
82
-
83
  response = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)[0]
84
-
85
  ```
86
 
87
  ### [](https://huggingface.co/Qwen/Qwen2.5-Coder-32B-Instruct#processing-long-texts)
 
50
  Now, proceed as usual with HuggingFace:
51
  ```python
52
  from transformers import AutoModelForCausalLM, AutoTokenizer
53
+
54
  model_name = "avemio/GRAG-PHI-3.5-MINI-4B-SFT-HESSIAN-AI"
55
+
56
  model = AutoModelForCausalLM.from_pretrained(
57
  model_name,
58
  torch_dtype="auto",
59
  device_map="auto"
60
  )
61
  tokenizer = AutoTokenizer.from_pretrained(model_name)
62
+ im_end_token_id = tokenizer.convert_tokens_to_ids('<|im_end|>')
63
+ im_start_token_id = tokenizer.convert_tokens_to_ids('<|im_start|>')
64
+
65
  messages = [
66
+ {"role": "system", "content": "Folge den Anweisungen des Benutzers. Bevor du deine finale Antwort gibst, schildere deine 脺berlegungen zur L枚sung des Problems."},
67
+ {"role": "user", "content": "Ferdinand steht vor der Herausforderung, eine faire Besuchsregelung f眉r seine drei Kinder zu finden, die den Bed眉rfnissen jedes einzelnen Kindes gerecht wird. Jedes Kind hat unterschiedliche Vorlieben und Bed眉rfnisse, die in den Besuchsplan integriert werden m眉ssen. Er muss sicherstellen, dass die Regelung sowohl den Interessen der Kinder als auch den rechtlichen Vorgaben entspricht. Ferdinand hat eine Woche Zeit, um einen Vorschlag zu erarbeiten, den er mit seinem Anwalt besprechen kann."}
68
  ]
69
  text = tokenizer.apply_chat_template(
70
  messages,
71
  tokenize=False,
72
+ add_generation_prompt=False
73
  )
74
  model_inputs = tokenizer([text], return_tensors="pt").to(model.device)
75
+
76
  generated_ids = model.generate(
77
  **model_inputs,
78
+ #max_new_tokens=-1,
79
+ max_length=2024,
80
+ temperature=0.01,
81
+ do_sample=False,
82
+ #bos_token_id=im_start_token_id,
83
+ eos_token_id=im_end_token_id,
84
+ pad_token_id=tokenizer.eos_token_id,
85
+ repetition_penalty=1.1,
86
+ num_return_sequences=1,
87
+ top_k=40,
88
+ top_p=0.95,
89
  )
90
  generated_ids = [
91
  output_ids[len(input_ids):] for input_ids, output_ids in zip(model_inputs.input_ids, generated_ids)
92
  ]
93
+
94
  response = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)[0]
95
+
96
  ```
97
 
98
  ### [](https://huggingface.co/Qwen/Qwen2.5-Coder-32B-Instruct#processing-long-texts)