Spaces:
Sleeping
Sleeping
Commit
·
09d8da8
1
Parent(s):
d4a7a03
Update app.py
Browse files
app.py
CHANGED
@@ -13,7 +13,7 @@ tokenizer = AutoTokenizer.from_pretrained("mistralai/Mistral-7B-v0.1")
|
|
13 |
flores = load_dataset("facebook/flores", "eng_Latn-ukr_Cyrl")
|
14 |
dataset = flores[subset]
|
15 |
|
16 |
-
fig, (axl, axr) = plt.subplots(1, 2, figsize=(10,
|
17 |
axl.hist(dataset.map(lambda x: {'num_tokens':len(tokenizer(x['sentence_eng_Latn'])['input_ids'])})['num_tokens'])
|
18 |
axl.set_title('eng mistral tokens')
|
19 |
axr.hist(dataset.map(lambda x: {'num_tokens':len(tokenizer(x['sentence_ukr_Cyrl'])['input_ids'])})['num_tokens'])
|
|
|
13 |
flores = load_dataset("facebook/flores", "eng_Latn-ukr_Cyrl")
|
14 |
dataset = flores[subset]
|
15 |
|
16 |
+
fig, (axl, axr) = plt.subplots(1, 2, figsize=(10,3))
|
17 |
axl.hist(dataset.map(lambda x: {'num_tokens':len(tokenizer(x['sentence_eng_Latn'])['input_ids'])})['num_tokens'])
|
18 |
axl.set_title('eng mistral tokens')
|
19 |
axr.hist(dataset.map(lambda x: {'num_tokens':len(tokenizer(x['sentence_ukr_Cyrl'])['input_ids'])})['num_tokens'])
|