Update README.md
Browse files
README.md
CHANGED
|
@@ -5,7 +5,7 @@ datasets:
|
|
| 5 |
- uonlp/CulturaX
|
| 6 |
- pg19
|
| 7 |
- bigcode/starcoderdata
|
| 8 |
-
-
|
| 9 |
language:
|
| 10 |
- fr
|
| 11 |
- en
|
|
@@ -65,7 +65,7 @@ tokenizer = AutoTokenizer.from_pretrained(model_name)
|
|
| 65 |
model = AutoModelForCausalLM.from_pretrained(model_name, torch_dtype=torch.float16, device_map="auto")
|
| 66 |
|
| 67 |
inputs = tokenizer("I am so tired I could sleep right now. -> Je suis si fatigué que je pourrais m'endormir maintenant.\nHe is heading to the market. -> Il va au marché.\nWe are running on the beach. ->", return_tensors="pt").to(model.device)
|
| 68 |
-
tokens = model.generate(**inputs, max_length=100, do_sample=True, top_p=0.95, top_k=60, temperature=0.
|
| 69 |
print(tokenizer.decode(tokens[0]))
|
| 70 |
|
| 71 |
# remove bos token
|
|
|
|
| 5 |
- uonlp/CulturaX
|
| 6 |
- pg19
|
| 7 |
- bigcode/starcoderdata
|
| 8 |
+
- croissantllm/croissant_dataset
|
| 9 |
language:
|
| 10 |
- fr
|
| 11 |
- en
|
|
|
|
| 65 |
model = AutoModelForCausalLM.from_pretrained(model_name, torch_dtype=torch.float16, device_map="auto")
|
| 66 |
|
| 67 |
inputs = tokenizer("I am so tired I could sleep right now. -> Je suis si fatigué que je pourrais m'endormir maintenant.\nHe is heading to the market. -> Il va au marché.\nWe are running on the beach. ->", return_tensors="pt").to(model.device)
|
| 68 |
+
tokens = model.generate(**inputs, max_length=100, do_sample=True, top_p=0.95, top_k=60, temperature=0.3)
|
| 69 |
print(tokenizer.decode(tokens[0]))
|
| 70 |
|
| 71 |
# remove bos token
|