Organize logs inside session_logs/
Browse files- session_logs/cp_finetuning_hp.log +66 -0
- session_logs/evaluation_results.json +504 -0
- session_logs/hyperparameter_results.csv +17 -0
- session_logs/logs/events.out.tfevents.1740319079.c7b23710d8e3.3460.0 +3 -0
- session_logs/logs/events.out.tfevents.1740336746.c7b23710d8e3.3460.1 +3 -0
- session_logs/logs/events.out.tfevents.1740354238.c7b23710d8e3.3460.2 +3 -0
- session_logs/logs/events.out.tfevents.1740371740.c7b23710d8e3.3460.3 +3 -0
- session_logs/logs/events.out.tfevents.1740389236.c7b23710d8e3.3460.4 +3 -0
- session_logs/logs/events.out.tfevents.1740406717.c7b23710d8e3.3460.5 +3 -0
- session_logs/logs/events.out.tfevents.1740424200.c7b23710d8e3.3460.6 +3 -0
- session_logs/logs/events.out.tfevents.1740441669.c7b23710d8e3.3460.7 +3 -0
- session_logs/logs/events.out.tfevents.1740459145.c7b23710d8e3.3460.8 +3 -0
- session_logs/logs/events.out.tfevents.1740476626.c7b23710d8e3.3460.9 +3 -0
- session_logs/logs/events.out.tfevents.1740494119.c7b23710d8e3.3460.10 +3 -0
- session_logs/logs/events.out.tfevents.1740511606.c7b23710d8e3.3460.11 +3 -0
- session_logs/logs/events.out.tfevents.1740529136.c7b23710d8e3.3460.12 +3 -0
- session_logs/logs/events.out.tfevents.1740546623.c7b23710d8e3.3460.13 +3 -0
- session_logs/logs/events.out.tfevents.1740564106.c7b23710d8e3.3460.14 +3 -0
- session_logs/logs/events.out.tfevents.1740581607.c7b23710d8e3.3460.15 +3 -0
session_logs/cp_finetuning_hp.log
ADDED
@@ -0,0 +1,66 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
2025-02-23 13:18:37,366 - Logging initialized for session: a2a8bc30-81cc-493a-b5d0-027703ef6644
|
2 |
+
2025-02-23 13:57:59,460 - Training with params:learning_rate=0.0002521627789110728, gradient_accumulation_steps=3
|
3 |
+
2025-02-23 18:52:14,169 - Using default tokenizer.
|
4 |
+
2025-02-23 18:52:20,179 - Evaluation Results: {'meteor_scores': {'meteor': 0.13681055469862474}, 'rouge_scores': {'rouge1': 0.05056556346878928, 'rouge2': 0.02, 'rougeL': 0.041474654377880185, 'rougeLsum': 0.041474654377880185}, 'bleu_scores': {'bleu': 0.03265459149664401, 'precisions': [0.08790593505039193, 0.041336353340883356, 0.023482245131729668, 0.013325608342989572], 'brevity_penalty': 1.0, 'length_ratio': 3.5436507936507935, 'translation_length': 1786, 'reference_length': 504}, 'perplexity': 46824568.0}
|
5 |
+
2025-02-23 18:52:26,164 - Removed saved model artifacts from ./cont_pretrained_3_0.0002521627789110728
|
6 |
+
2025-02-23 18:52:26,165 - Training with params:learning_rate=0.00023424666142554082, gradient_accumulation_steps=3
|
7 |
+
2025-02-23 23:43:48,934 - Using default tokenizer.
|
8 |
+
2025-02-23 23:43:52,221 - Evaluation Results: {'meteor_scores': {'meteor': 0.15479365471627002}, 'rouge_scores': {'rouge1': 0.05056556346878928, 'rouge2': 0.02, 'rougeL': 0.041474654377880185, 'rougeLsum': 0.041474654377880185}, 'bleu_scores': {'bleu': 0.04584003595004448, 'precisions': [0.12137823022709475, 0.05807478122513922, 0.033144704931285365, 0.018898931799506986], 'brevity_penalty': 1.0, 'length_ratio': 2.5337301587301586, 'translation_length': 1277, 'reference_length': 504}, 'perplexity': 14256515.0}
|
9 |
+
2025-02-23 23:43:57,890 - Removed saved model artifacts from ./cont_pretrained_3_0.00023424666142554082
|
10 |
+
2025-02-23 23:43:57,902 - Training with params:learning_rate=0.0004038905726999131, gradient_accumulation_steps=2
|
11 |
+
2025-02-24 04:35:31,165 - Using default tokenizer.
|
12 |
+
2025-02-24 04:35:34,536 - Evaluation Results: {'meteor_scores': {'meteor': 0.15189724535656285}, 'rouge_scores': {'rouge1': 0.05056556346878928, 'rouge2': 0.02, 'rougeL': 0.04147465437788019, 'rougeLsum': 0.04147465437788019}, 'bleu_scores': {'bleu': 0.04315852976676815, 'precisions': [0.11275964391691394, 0.054969879518072286, 0.03134556574923547, 0.017857142857142856], 'brevity_penalty': 1.0, 'length_ratio': 2.6746031746031744, 'translation_length': 1348, 'reference_length': 504}, 'perplexity': 8112177.0}
|
13 |
+
2025-02-24 04:35:39,861 - Removed saved model artifacts from ./cont_pretrained_2_0.0004038905726999131
|
14 |
+
2025-02-24 04:35:39,875 - Training with params:learning_rate=0.0007026402144219669, gradient_accumulation_steps=2
|
15 |
+
2025-02-24 09:27:07,661 - Using default tokenizer.
|
16 |
+
2025-02-24 09:27:10,994 - Evaluation Results: {'meteor_scores': {'meteor': 0.1562446210181405}, 'rouge_scores': {'rouge1': 0.05056556346878928, 'rouge2': 0.02, 'rougeL': 0.041474654377880185, 'rougeLsum': 0.041474654377880185}, 'bleu_scores': {'bleu': 0.049508728443907975, 'precisions': [0.1289228159457167, 0.06298533218291631, 0.035996488147497806, 0.020554066130473638], 'brevity_penalty': 1.0, 'length_ratio': 2.3392857142857144, 'translation_length': 1179, 'reference_length': 504}, 'perplexity': 5523488.0}
|
17 |
+
2025-02-24 09:27:16,330 - Removed saved model artifacts from ./cont_pretrained_2_0.0007026402144219669
|
18 |
+
2025-02-24 09:27:16,344 - Training with params:learning_rate=0.00020653097601095523, gradient_accumulation_steps=3
|
19 |
+
2025-02-24 14:18:29,370 - Using default tokenizer.
|
20 |
+
2025-02-24 14:18:32,711 - Evaluation Results: {'meteor_scores': {'meteor': 0.15908333848043738}, 'rouge_scores': {'rouge1': 0.05056556346878928, 'rouge2': 0.02, 'rougeL': 0.041474654377880185, 'rougeLsum': 0.041474654377880185}, 'bleu_scores': {'bleu': 0.05901458903655539, 'precisions': [0.1529175050301811, 0.07494866529774127, 0.0429769392033543, 0.02462526766595289], 'brevity_penalty': 1.0, 'length_ratio': 1.9722222222222223, 'translation_length': 994, 'reference_length': 504}, 'perplexity': 4067049.25}
|
21 |
+
2025-02-24 14:18:36,977 - Removed saved model artifacts from ./cont_pretrained_3_0.00020653097601095523
|
22 |
+
2025-02-24 14:18:36,988 - Training with params:learning_rate=0.0008565126838518531, gradient_accumulation_steps=4
|
23 |
+
2025-02-24 19:09:50,966 - Using default tokenizer.
|
24 |
+
2025-02-24 19:09:54,285 - Evaluation Results: {'meteor_scores': {'meteor': 0.16168163616406164}, 'rouge_scores': {'rouge1': 0.07004608294930877, 'rouge2': 0.02, 'rougeL': 0.05965647255969838, 'rougeLsum': 0.06000000000000001}, 'bleu_scores': {'bleu': 0.058454273860187325, 'precisions': [0.15376984126984128, 0.07388663967611336, 0.042355371900826444, 0.024261603375527425], 'brevity_penalty': 1.0, 'length_ratio': 2.0, 'translation_length': 1008, 'reference_length': 504}, 'perplexity': 4530443.0}
|
25 |
+
2025-02-24 19:09:59,453 - Removed saved model artifacts from ./cont_pretrained_4_0.0008565126838518531
|
26 |
+
2025-02-24 19:09:59,639 - Training with params:learning_rate=0.00029985537229988896, gradient_accumulation_steps=3
|
27 |
+
2025-02-25 00:01:00,201 - Using default tokenizer.
|
28 |
+
2025-02-25 00:01:03,486 - Evaluation Results: {'meteor_scores': {'meteor': 0.1585074778231058}, 'rouge_scores': {'rouge1': 0.05056556346878928, 'rouge2': 0.02, 'rougeL': 0.041474654377880185, 'rougeLsum': 0.041474654377880185}, 'bleu_scores': {'bleu': 0.06491375147389095, 'precisions': [0.16685082872928178, 0.08248587570621468, 0.047398843930635835, 0.027218934911242602], 'brevity_penalty': 1.0, 'length_ratio': 1.7956349206349207, 'translation_length': 905, 'reference_length': 504}, 'perplexity': 4327195.5}
|
29 |
+
2025-02-25 00:01:08,838 - Removed saved model artifacts from ./cont_pretrained_3_0.00029985537229988896
|
30 |
+
2025-02-25 00:01:09,010 - Training with params:learning_rate=0.0008550094708042585, gradient_accumulation_steps=4
|
31 |
+
2025-02-25 04:52:16,738 - Using default tokenizer.
|
32 |
+
2025-02-25 04:52:20,045 - Evaluation Results: {'meteor_scores': {'meteor': 0.15646599253419569}, 'rouge_scores': {'rouge1': 0.05056556346878928, 'rouge2': 0.02, 'rougeL': 0.041474654377880185, 'rougeLsum': 0.041474654377880185}, 'bleu_scores': {'bleu': 0.06783137441554227, 'precisions': [0.17321016166281755, 0.08628841607565012, 0.04963680387409201, 0.028535980148883373], 'brevity_penalty': 1.0, 'length_ratio': 1.7182539682539681, 'translation_length': 866, 'reference_length': 504}, 'perplexity': 4576987.0}
|
33 |
+
2025-02-25 04:52:25,047 - Removed saved model artifacts from ./cont_pretrained_4_0.0008550094708042585
|
34 |
+
2025-02-25 04:52:25,331 - Training with params:learning_rate=0.00015842823132994197, gradient_accumulation_steps=3
|
35 |
+
2025-02-25 09:43:36,419 - Using default tokenizer.
|
36 |
+
2025-02-25 09:43:39,722 - Evaluation Results: {'meteor_scores': {'meteor': 0.15645022285051025}, 'rouge_scores': {'rouge1': 0.05056556346878928, 'rouge2': 0.02, 'rougeL': 0.041474654377880185, 'rougeLsum': 0.041474654377880185}, 'bleu_scores': {'bleu': 0.06758865797310103, 'precisions': [0.17261219792865362, 0.08598351001177856, 0.04945717732207479, 0.02843016069221261], 'brevity_penalty': 1.0, 'length_ratio': 1.7242063492063493, 'translation_length': 869, 'reference_length': 504}, 'perplexity': 3687402.75}
|
37 |
+
2025-02-25 09:43:45,067 - Removed saved model artifacts from ./cont_pretrained_3_0.00015842823132994197
|
38 |
+
2025-02-25 09:43:45,407 - Training with params:learning_rate=0.0006671750787537489, gradient_accumulation_steps=2
|
39 |
+
2025-02-25 14:35:10,593 - Using default tokenizer.
|
40 |
+
2025-02-25 14:35:13,902 - Evaluation Results: {'meteor_scores': {'meteor': 0.15646599253419569}, 'rouge_scores': {'rouge1': 0.05056556346878928, 'rouge2': 0.02, 'rougeL': 0.041474654377880185, 'rougeLsum': 0.041474654377880185}, 'bleu_scores': {'bleu': 0.06783137441554227, 'precisions': [0.17321016166281755, 0.08628841607565012, 0.04963680387409201, 0.028535980148883373], 'brevity_penalty': 1.0, 'length_ratio': 1.7182539682539681, 'translation_length': 866, 'reference_length': 504}, 'perplexity': 3413910.5}
|
41 |
+
2025-02-25 14:35:18,701 - Removed saved model artifacts from ./cont_pretrained_2_0.0006671750787537489
|
42 |
+
2025-02-25 14:35:19,090 - Training with params:learning_rate=0.0007681823532549075, gradient_accumulation_steps=3
|
43 |
+
2025-02-25 19:26:37,185 - Using default tokenizer.
|
44 |
+
2025-02-25 19:26:40,483 - Evaluation Results: {'meteor_scores': {'meteor': 0.15645022285051025}, 'rouge_scores': {'rouge1': 0.05056556346878928, 'rouge2': 0.02, 'rougeL': 0.041474654377880185, 'rougeLsum': 0.041474654377880185}, 'bleu_scores': {'bleu': 0.06958047742513221, 'precisions': [0.17751479289940827, 0.08848484848484849, 0.05093167701863354, 0.02929936305732484], 'brevity_penalty': 1.0, 'length_ratio': 1.6765873015873016, 'translation_length': 845, 'reference_length': 504}, 'perplexity': 3161555.0}
|
45 |
+
2025-02-25 19:26:45,452 - Removed saved model artifacts from ./cont_pretrained_3_0.0007681823532549075
|
46 |
+
2025-02-25 19:26:45,787 - Training with params:learning_rate=0.0006461830095508175, gradient_accumulation_steps=4
|
47 |
+
2025-02-26 00:18:46,498 - Using default tokenizer.
|
48 |
+
2025-02-26 00:18:49,809 - Evaluation Results: {'meteor_scores': {'meteor': 0.15645022285051025}, 'rouge_scores': {'rouge1': 0.05056556346878928, 'rouge2': 0.02, 'rougeL': 0.041474654377880185, 'rougeLsum': 0.041474654377880185}, 'bleu_scores': {'bleu': 0.06726772615452566, 'precisions': [0.1718213058419244, 0.08558030480656506, 0.04921968787515006, 0.028290282902829027], 'brevity_penalty': 1.0, 'length_ratio': 1.7321428571428572, 'translation_length': 873, 'reference_length': 504}, 'perplexity': 3629504.5}
|
49 |
+
2025-02-26 00:18:55,347 - Removed saved model artifacts from ./cont_pretrained_4_0.0006461830095508175
|
50 |
+
2025-02-26 00:18:55,605 - Training with params:learning_rate=0.0006627784604231771, gradient_accumulation_steps=2
|
51 |
+
2025-02-26 05:10:14,055 - Using default tokenizer.
|
52 |
+
2025-02-26 05:10:17,362 - Evaluation Results: {'meteor_scores': {'meteor': 0.15645022285051025}, 'rouge_scores': {'rouge1': 0.05056556346878928, 'rouge2': 0.02, 'rougeL': 0.041474654377880185, 'rougeLsum': 0.041474654377880185}, 'bleu_scores': {'bleu': 0.06958047742513221, 'precisions': [0.17751479289940827, 0.08848484848484849, 0.05093167701863354, 0.02929936305732484], 'brevity_penalty': 1.0, 'length_ratio': 1.6765873015873016, 'translation_length': 845, 'reference_length': 504}, 'perplexity': 3930782.75}
|
53 |
+
2025-02-26 05:10:22,805 - Removed saved model artifacts from ./cont_pretrained_2_0.0006627784604231771
|
54 |
+
2025-02-26 05:10:23,214 - Training with params:learning_rate=0.0006313414373568891, gradient_accumulation_steps=4
|
55 |
+
2025-02-26 10:01:37,686 - Using default tokenizer.
|
56 |
+
2025-02-26 10:01:41,018 - Evaluation Results: {'meteor_scores': {'meteor': 0.15645022285051025}, 'rouge_scores': {'rouge1': 0.05056556346878928, 'rouge2': 0.02, 'rougeL': 0.041474654377880185, 'rougeLsum': 0.041474654377880185}, 'bleu_scores': {'bleu': 0.06532898537563, 'precisions': [0.16703786191536749, 0.08314350797266515, 0.047785547785547784, 0.027446300715990454], 'brevity_penalty': 1.0, 'length_ratio': 1.7817460317460319, 'translation_length': 898, 'reference_length': 504}, 'perplexity': 3531264.0}
|
57 |
+
2025-02-26 10:01:45,714 - Removed saved model artifacts from ./cont_pretrained_4_0.0006313414373568891
|
58 |
+
2025-02-26 10:01:46,019 - Training with params:learning_rate=0.000556438434253926, gradient_accumulation_steps=3
|
59 |
+
2025-02-26 14:53:17,910 - Using default tokenizer.
|
60 |
+
2025-02-26 14:53:21,225 - Evaluation Results: {'meteor_scores': {'meteor': 0.15645022285051025}, 'rouge_scores': {'rouge1': 0.05056556346878928, 'rouge2': 0.02, 'rougeL': 0.041474654377880185, 'rougeLsum': 0.041474654377880185}, 'bleu_scores': {'bleu': 0.06502911340827192, 'precisions': [0.1662971175166297, 0.08276643990929705, 0.04756380510440835, 0.027315914489311165], 'brevity_penalty': 1.0, 'length_ratio': 1.7896825396825398, 'translation_length': 902, 'reference_length': 504}, 'perplexity': 3744938.5}
|
61 |
+
2025-02-26 14:53:26,536 - Removed saved model artifacts from ./cont_pretrained_3_0.000556438434253926
|
62 |
+
2025-02-26 14:53:26,908 - Training with params:learning_rate=0.0006375506890882657, gradient_accumulation_steps=4
|
63 |
+
2025-02-26 19:44:51,935 - Using default tokenizer.
|
64 |
+
2025-02-26 19:44:55,240 - Evaluation Results: {'meteor_scores': {'meteor': 0.15645022285051025}, 'rouge_scores': {'rouge1': 0.05056556346878928, 'rouge2': 0.02, 'rougeL': 0.041474654377880185, 'rougeLsum': 0.041474654377880185}, 'bleu_scores': {'bleu': 0.06563163671591424, 'precisions': [0.16778523489932887, 0.08352402745995423, 0.04800936768149883, 0.027577937649880094], 'brevity_penalty': 1.0, 'length_ratio': 1.7738095238095237, 'translation_length': 894, 'reference_length': 504}, 'perplexity': 4116836.5}
|
65 |
+
2025-02-26 19:44:59,657 - Removed saved model artifacts from ./cont_pretrained_4_0.0006375506890882657
|
66 |
+
2025-02-26 19:44:59,668 - Best hyperparameters: {'gradient_accumulation_steps': 3.407447233099833, 'learning_rate': 0.0007681823532549075, 'lr_scheduler_type': 0.6366548752851425, 'warmup_steps': 267.3313548397547}
|
session_logs/evaluation_results.json
ADDED
@@ -0,0 +1,504 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"1": {
|
3 |
+
"learning_rate": 0.0002521627789110728,
|
4 |
+
"gradient_accumulation_steps": 3,
|
5 |
+
"warmup_steps": 1539,
|
6 |
+
"lr_scheduler_type": "linear",
|
7 |
+
"eval_results": {
|
8 |
+
"meteor_scores": {
|
9 |
+
"meteor": 0.13681055469862474
|
10 |
+
},
|
11 |
+
"rouge_scores": {
|
12 |
+
"rouge1": 0.05056556346878928,
|
13 |
+
"rouge2": 0.02,
|
14 |
+
"rougeL": 0.041474654377880185,
|
15 |
+
"rougeLsum": 0.041474654377880185
|
16 |
+
},
|
17 |
+
"bleu_scores": {
|
18 |
+
"bleu": 0.03265459149664401,
|
19 |
+
"precisions": [
|
20 |
+
0.08790593505039193,
|
21 |
+
0.041336353340883356,
|
22 |
+
0.023482245131729668,
|
23 |
+
0.013325608342989572
|
24 |
+
],
|
25 |
+
"brevity_penalty": 1.0,
|
26 |
+
"length_ratio": 3.5436507936507935,
|
27 |
+
"translation_length": 1786,
|
28 |
+
"reference_length": 504
|
29 |
+
},
|
30 |
+
"perplexity": 46824568.0
|
31 |
+
}
|
32 |
+
},
|
33 |
+
"2": {
|
34 |
+
"learning_rate": 0.00023424666142554082,
|
35 |
+
"gradient_accumulation_steps": 3,
|
36 |
+
"warmup_steps": 840,
|
37 |
+
"lr_scheduler_type": "linear",
|
38 |
+
"eval_results": {
|
39 |
+
"meteor_scores": {
|
40 |
+
"meteor": 0.15479365471627002
|
41 |
+
},
|
42 |
+
"rouge_scores": {
|
43 |
+
"rouge1": 0.05056556346878928,
|
44 |
+
"rouge2": 0.02,
|
45 |
+
"rougeL": 0.041474654377880185,
|
46 |
+
"rougeLsum": 0.041474654377880185
|
47 |
+
},
|
48 |
+
"bleu_scores": {
|
49 |
+
"bleu": 0.04584003595004448,
|
50 |
+
"precisions": [
|
51 |
+
0.12137823022709475,
|
52 |
+
0.05807478122513922,
|
53 |
+
0.033144704931285365,
|
54 |
+
0.018898931799506986
|
55 |
+
],
|
56 |
+
"brevity_penalty": 1.0,
|
57 |
+
"length_ratio": 2.5337301587301586,
|
58 |
+
"translation_length": 1277,
|
59 |
+
"reference_length": 504
|
60 |
+
},
|
61 |
+
"perplexity": 14256515.0
|
62 |
+
}
|
63 |
+
},
|
64 |
+
"3": {
|
65 |
+
"learning_rate": 0.0004038905726999131,
|
66 |
+
"gradient_accumulation_steps": 2,
|
67 |
+
"warmup_steps": 475,
|
68 |
+
"lr_scheduler_type": "cosine",
|
69 |
+
"eval_results": {
|
70 |
+
"meteor_scores": {
|
71 |
+
"meteor": 0.15189724535656285
|
72 |
+
},
|
73 |
+
"rouge_scores": {
|
74 |
+
"rouge1": 0.05056556346878928,
|
75 |
+
"rouge2": 0.02,
|
76 |
+
"rougeL": 0.04147465437788019,
|
77 |
+
"rougeLsum": 0.04147465437788019
|
78 |
+
},
|
79 |
+
"bleu_scores": {
|
80 |
+
"bleu": 0.04315852976676815,
|
81 |
+
"precisions": [
|
82 |
+
0.11275964391691394,
|
83 |
+
0.054969879518072286,
|
84 |
+
0.03134556574923547,
|
85 |
+
0.017857142857142856
|
86 |
+
],
|
87 |
+
"brevity_penalty": 1.0,
|
88 |
+
"length_ratio": 2.6746031746031744,
|
89 |
+
"translation_length": 1348,
|
90 |
+
"reference_length": 504
|
91 |
+
},
|
92 |
+
"perplexity": 8112177.0
|
93 |
+
}
|
94 |
+
},
|
95 |
+
"4": {
|
96 |
+
"learning_rate": 0.0007026402144219669,
|
97 |
+
"gradient_accumulation_steps": 2,
|
98 |
+
"warmup_steps": 549,
|
99 |
+
"lr_scheduler_type": "cosine",
|
100 |
+
"eval_results": {
|
101 |
+
"meteor_scores": {
|
102 |
+
"meteor": 0.1562446210181405
|
103 |
+
},
|
104 |
+
"rouge_scores": {
|
105 |
+
"rouge1": 0.05056556346878928,
|
106 |
+
"rouge2": 0.02,
|
107 |
+
"rougeL": 0.041474654377880185,
|
108 |
+
"rougeLsum": 0.041474654377880185
|
109 |
+
},
|
110 |
+
"bleu_scores": {
|
111 |
+
"bleu": 0.049508728443907975,
|
112 |
+
"precisions": [
|
113 |
+
0.1289228159457167,
|
114 |
+
0.06298533218291631,
|
115 |
+
0.035996488147497806,
|
116 |
+
0.020554066130473638
|
117 |
+
],
|
118 |
+
"brevity_penalty": 1.0,
|
119 |
+
"length_ratio": 2.3392857142857144,
|
120 |
+
"translation_length": 1179,
|
121 |
+
"reference_length": 504
|
122 |
+
},
|
123 |
+
"perplexity": 5523488.0
|
124 |
+
}
|
125 |
+
},
|
126 |
+
"5": {
|
127 |
+
"learning_rate": 0.00020653097601095523,
|
128 |
+
"gradient_accumulation_steps": 3,
|
129 |
+
"warmup_steps": 1802,
|
130 |
+
"lr_scheduler_type": "linear",
|
131 |
+
"eval_results": {
|
132 |
+
"meteor_scores": {
|
133 |
+
"meteor": 0.15908333848043738
|
134 |
+
},
|
135 |
+
"rouge_scores": {
|
136 |
+
"rouge1": 0.05056556346878928,
|
137 |
+
"rouge2": 0.02,
|
138 |
+
"rougeL": 0.041474654377880185,
|
139 |
+
"rougeLsum": 0.041474654377880185
|
140 |
+
},
|
141 |
+
"bleu_scores": {
|
142 |
+
"bleu": 0.05901458903655539,
|
143 |
+
"precisions": [
|
144 |
+
0.1529175050301811,
|
145 |
+
0.07494866529774127,
|
146 |
+
0.0429769392033543,
|
147 |
+
0.02462526766595289
|
148 |
+
],
|
149 |
+
"brevity_penalty": 1.0,
|
150 |
+
"length_ratio": 1.9722222222222223,
|
151 |
+
"translation_length": 994,
|
152 |
+
"reference_length": 504
|
153 |
+
},
|
154 |
+
"perplexity": 4067049.25
|
155 |
+
}
|
156 |
+
},
|
157 |
+
"6": {
|
158 |
+
"learning_rate": 0.0008565126838518531,
|
159 |
+
"gradient_accumulation_steps": 4,
|
160 |
+
"warmup_steps": 1982,
|
161 |
+
"lr_scheduler_type": "cosine",
|
162 |
+
"eval_results": {
|
163 |
+
"meteor_scores": {
|
164 |
+
"meteor": 0.16168163616406164
|
165 |
+
},
|
166 |
+
"rouge_scores": {
|
167 |
+
"rouge1": 0.07004608294930877,
|
168 |
+
"rouge2": 0.02,
|
169 |
+
"rougeL": 0.05965647255969838,
|
170 |
+
"rougeLsum": 0.06000000000000001
|
171 |
+
},
|
172 |
+
"bleu_scores": {
|
173 |
+
"bleu": 0.058454273860187325,
|
174 |
+
"precisions": [
|
175 |
+
0.15376984126984128,
|
176 |
+
0.07388663967611336,
|
177 |
+
0.042355371900826444,
|
178 |
+
0.024261603375527425
|
179 |
+
],
|
180 |
+
"brevity_penalty": 1.0,
|
181 |
+
"length_ratio": 2.0,
|
182 |
+
"translation_length": 1008,
|
183 |
+
"reference_length": 504
|
184 |
+
},
|
185 |
+
"perplexity": 4530443.0
|
186 |
+
}
|
187 |
+
},
|
188 |
+
"7": {
|
189 |
+
"learning_rate": 0.00029985537229988896,
|
190 |
+
"gradient_accumulation_steps": 3,
|
191 |
+
"warmup_steps": 0,
|
192 |
+
"lr_scheduler_type": "linear",
|
193 |
+
"eval_results": {
|
194 |
+
"meteor_scores": {
|
195 |
+
"meteor": 0.1585074778231058
|
196 |
+
},
|
197 |
+
"rouge_scores": {
|
198 |
+
"rouge1": 0.05056556346878928,
|
199 |
+
"rouge2": 0.02,
|
200 |
+
"rougeL": 0.041474654377880185,
|
201 |
+
"rougeLsum": 0.041474654377880185
|
202 |
+
},
|
203 |
+
"bleu_scores": {
|
204 |
+
"bleu": 0.06491375147389095,
|
205 |
+
"precisions": [
|
206 |
+
0.16685082872928178,
|
207 |
+
0.08248587570621468,
|
208 |
+
0.047398843930635835,
|
209 |
+
0.027218934911242602
|
210 |
+
],
|
211 |
+
"brevity_penalty": 1.0,
|
212 |
+
"length_ratio": 1.7956349206349207,
|
213 |
+
"translation_length": 905,
|
214 |
+
"reference_length": 504
|
215 |
+
},
|
216 |
+
"perplexity": 4327195.5
|
217 |
+
}
|
218 |
+
},
|
219 |
+
"8": {
|
220 |
+
"learning_rate": 0.0008550094708042585,
|
221 |
+
"gradient_accumulation_steps": 4,
|
222 |
+
"warmup_steps": 1803,
|
223 |
+
"lr_scheduler_type": "linear",
|
224 |
+
"eval_results": {
|
225 |
+
"meteor_scores": {
|
226 |
+
"meteor": 0.15646599253419569
|
227 |
+
},
|
228 |
+
"rouge_scores": {
|
229 |
+
"rouge1": 0.05056556346878928,
|
230 |
+
"rouge2": 0.02,
|
231 |
+
"rougeL": 0.041474654377880185,
|
232 |
+
"rougeLsum": 0.041474654377880185
|
233 |
+
},
|
234 |
+
"bleu_scores": {
|
235 |
+
"bleu": 0.06783137441554227,
|
236 |
+
"precisions": [
|
237 |
+
0.17321016166281755,
|
238 |
+
0.08628841607565012,
|
239 |
+
0.04963680387409201,
|
240 |
+
0.028535980148883373
|
241 |
+
],
|
242 |
+
"brevity_penalty": 1.0,
|
243 |
+
"length_ratio": 1.7182539682539681,
|
244 |
+
"translation_length": 866,
|
245 |
+
"reference_length": 504
|
246 |
+
},
|
247 |
+
"perplexity": 4576987.0
|
248 |
+
}
|
249 |
+
},
|
250 |
+
"9": {
|
251 |
+
"learning_rate": 0.00015842823132994197,
|
252 |
+
"gradient_accumulation_steps": 3,
|
253 |
+
"warmup_steps": 1716,
|
254 |
+
"lr_scheduler_type": "linear",
|
255 |
+
"eval_results": {
|
256 |
+
"meteor_scores": {
|
257 |
+
"meteor": 0.15645022285051025
|
258 |
+
},
|
259 |
+
"rouge_scores": {
|
260 |
+
"rouge1": 0.05056556346878928,
|
261 |
+
"rouge2": 0.02,
|
262 |
+
"rougeL": 0.041474654377880185,
|
263 |
+
"rougeLsum": 0.041474654377880185
|
264 |
+
},
|
265 |
+
"bleu_scores": {
|
266 |
+
"bleu": 0.06758865797310103,
|
267 |
+
"precisions": [
|
268 |
+
0.17261219792865362,
|
269 |
+
0.08598351001177856,
|
270 |
+
0.04945717732207479,
|
271 |
+
0.02843016069221261
|
272 |
+
],
|
273 |
+
"brevity_penalty": 1.0,
|
274 |
+
"length_ratio": 1.7242063492063493,
|
275 |
+
"translation_length": 869,
|
276 |
+
"reference_length": 504
|
277 |
+
},
|
278 |
+
"perplexity": 3687402.75
|
279 |
+
}
|
280 |
+
},
|
281 |
+
"10": {
|
282 |
+
"learning_rate": 0.0006671750787537489,
|
283 |
+
"gradient_accumulation_steps": 2,
|
284 |
+
"warmup_steps": 134,
|
285 |
+
"lr_scheduler_type": "linear",
|
286 |
+
"eval_results": {
|
287 |
+
"meteor_scores": {
|
288 |
+
"meteor": 0.15646599253419569
|
289 |
+
},
|
290 |
+
"rouge_scores": {
|
291 |
+
"rouge1": 0.05056556346878928,
|
292 |
+
"rouge2": 0.02,
|
293 |
+
"rougeL": 0.041474654377880185,
|
294 |
+
"rougeLsum": 0.041474654377880185
|
295 |
+
},
|
296 |
+
"bleu_scores": {
|
297 |
+
"bleu": 0.06783137441554227,
|
298 |
+
"precisions": [
|
299 |
+
0.17321016166281755,
|
300 |
+
0.08628841607565012,
|
301 |
+
0.04963680387409201,
|
302 |
+
0.028535980148883373
|
303 |
+
],
|
304 |
+
"brevity_penalty": 1.0,
|
305 |
+
"length_ratio": 1.7182539682539681,
|
306 |
+
"translation_length": 866,
|
307 |
+
"reference_length": 504
|
308 |
+
},
|
309 |
+
"perplexity": 3413910.5
|
310 |
+
}
|
311 |
+
},
|
312 |
+
"11": {
|
313 |
+
"learning_rate": 0.0007681823532549075,
|
314 |
+
"gradient_accumulation_steps": 3,
|
315 |
+
"warmup_steps": 267,
|
316 |
+
"lr_scheduler_type": "cosine",
|
317 |
+
"eval_results": {
|
318 |
+
"meteor_scores": {
|
319 |
+
"meteor": 0.15645022285051025
|
320 |
+
},
|
321 |
+
"rouge_scores": {
|
322 |
+
"rouge1": 0.05056556346878928,
|
323 |
+
"rouge2": 0.02,
|
324 |
+
"rougeL": 0.041474654377880185,
|
325 |
+
"rougeLsum": 0.041474654377880185
|
326 |
+
},
|
327 |
+
"bleu_scores": {
|
328 |
+
"bleu": 0.06958047742513221,
|
329 |
+
"precisions": [
|
330 |
+
0.17751479289940827,
|
331 |
+
0.08848484848484849,
|
332 |
+
0.05093167701863354,
|
333 |
+
0.02929936305732484
|
334 |
+
],
|
335 |
+
"brevity_penalty": 1.0,
|
336 |
+
"length_ratio": 1.6765873015873016,
|
337 |
+
"translation_length": 845,
|
338 |
+
"reference_length": 504
|
339 |
+
},
|
340 |
+
"perplexity": 3161555.0
|
341 |
+
}
|
342 |
+
},
|
343 |
+
"12": {
|
344 |
+
"learning_rate": 0.0006461830095508175,
|
345 |
+
"gradient_accumulation_steps": 4,
|
346 |
+
"warmup_steps": 1149,
|
347 |
+
"lr_scheduler_type": "linear",
|
348 |
+
"eval_results": {
|
349 |
+
"meteor_scores": {
|
350 |
+
"meteor": 0.15645022285051025
|
351 |
+
},
|
352 |
+
"rouge_scores": {
|
353 |
+
"rouge1": 0.05056556346878928,
|
354 |
+
"rouge2": 0.02,
|
355 |
+
"rougeL": 0.041474654377880185,
|
356 |
+
"rougeLsum": 0.041474654377880185
|
357 |
+
},
|
358 |
+
"bleu_scores": {
|
359 |
+
"bleu": 0.06726772615452566,
|
360 |
+
"precisions": [
|
361 |
+
0.1718213058419244,
|
362 |
+
0.08558030480656506,
|
363 |
+
0.04921968787515006,
|
364 |
+
0.028290282902829027
|
365 |
+
],
|
366 |
+
"brevity_penalty": 1.0,
|
367 |
+
"length_ratio": 1.7321428571428572,
|
368 |
+
"translation_length": 873,
|
369 |
+
"reference_length": 504
|
370 |
+
},
|
371 |
+
"perplexity": 3629504.5
|
372 |
+
}
|
373 |
+
},
|
374 |
+
"13": {
|
375 |
+
"learning_rate": 0.0006627784604231771,
|
376 |
+
"gradient_accumulation_steps": 2,
|
377 |
+
"warmup_steps": 1269,
|
378 |
+
"lr_scheduler_type": "cosine",
|
379 |
+
"eval_results": {
|
380 |
+
"meteor_scores": {
|
381 |
+
"meteor": 0.15645022285051025
|
382 |
+
},
|
383 |
+
"rouge_scores": {
|
384 |
+
"rouge1": 0.05056556346878928,
|
385 |
+
"rouge2": 0.02,
|
386 |
+
"rougeL": 0.041474654377880185,
|
387 |
+
"rougeLsum": 0.041474654377880185
|
388 |
+
},
|
389 |
+
"bleu_scores": {
|
390 |
+
"bleu": 0.06958047742513221,
|
391 |
+
"precisions": [
|
392 |
+
0.17751479289940827,
|
393 |
+
0.08848484848484849,
|
394 |
+
0.05093167701863354,
|
395 |
+
0.02929936305732484
|
396 |
+
],
|
397 |
+
"brevity_penalty": 1.0,
|
398 |
+
"length_ratio": 1.6765873015873016,
|
399 |
+
"translation_length": 845,
|
400 |
+
"reference_length": 504
|
401 |
+
},
|
402 |
+
"perplexity": 3930782.75
|
403 |
+
}
|
404 |
+
},
|
405 |
+
"14": {
|
406 |
+
"learning_rate": 0.0006313414373568891,
|
407 |
+
"gradient_accumulation_steps": 4,
|
408 |
+
"warmup_steps": 1028,
|
409 |
+
"lr_scheduler_type": "linear",
|
410 |
+
"eval_results": {
|
411 |
+
"meteor_scores": {
|
412 |
+
"meteor": 0.15645022285051025
|
413 |
+
},
|
414 |
+
"rouge_scores": {
|
415 |
+
"rouge1": 0.05056556346878928,
|
416 |
+
"rouge2": 0.02,
|
417 |
+
"rougeL": 0.041474654377880185,
|
418 |
+
"rougeLsum": 0.041474654377880185
|
419 |
+
},
|
420 |
+
"bleu_scores": {
|
421 |
+
"bleu": 0.06532898537563,
|
422 |
+
"precisions": [
|
423 |
+
0.16703786191536749,
|
424 |
+
0.08314350797266515,
|
425 |
+
0.047785547785547784,
|
426 |
+
0.027446300715990454
|
427 |
+
],
|
428 |
+
"brevity_penalty": 1.0,
|
429 |
+
"length_ratio": 1.7817460317460319,
|
430 |
+
"translation_length": 898,
|
431 |
+
"reference_length": 504
|
432 |
+
},
|
433 |
+
"perplexity": 3531264.0
|
434 |
+
}
|
435 |
+
},
|
436 |
+
"15": {
|
437 |
+
"learning_rate": 0.000556438434253926,
|
438 |
+
"gradient_accumulation_steps": 3,
|
439 |
+
"warmup_steps": 671,
|
440 |
+
"lr_scheduler_type": "linear",
|
441 |
+
"eval_results": {
|
442 |
+
"meteor_scores": {
|
443 |
+
"meteor": 0.15645022285051025
|
444 |
+
},
|
445 |
+
"rouge_scores": {
|
446 |
+
"rouge1": 0.05056556346878928,
|
447 |
+
"rouge2": 0.02,
|
448 |
+
"rougeL": 0.041474654377880185,
|
449 |
+
"rougeLsum": 0.041474654377880185
|
450 |
+
},
|
451 |
+
"bleu_scores": {
|
452 |
+
"bleu": 0.06502911340827192,
|
453 |
+
"precisions": [
|
454 |
+
0.1662971175166297,
|
455 |
+
0.08276643990929705,
|
456 |
+
0.04756380510440835,
|
457 |
+
0.027315914489311165
|
458 |
+
],
|
459 |
+
"brevity_penalty": 1.0,
|
460 |
+
"length_ratio": 1.7896825396825398,
|
461 |
+
"translation_length": 902,
|
462 |
+
"reference_length": 504
|
463 |
+
},
|
464 |
+
"perplexity": 3744938.5
|
465 |
+
}
|
466 |
+
},
|
467 |
+
"16": {
|
468 |
+
"learning_rate": 0.0006375506890882657,
|
469 |
+
"gradient_accumulation_steps": 4,
|
470 |
+
"warmup_steps": 360,
|
471 |
+
"lr_scheduler_type": "cosine",
|
472 |
+
"eval_results": {
|
473 |
+
"meteor_scores": {
|
474 |
+
"meteor": 0.15645022285051025
|
475 |
+
},
|
476 |
+
"rouge_scores": {
|
477 |
+
"rouge1": 0.05056556346878928,
|
478 |
+
"rouge2": 0.02,
|
479 |
+
"rougeL": 0.041474654377880185,
|
480 |
+
"rougeLsum": 0.041474654377880185
|
481 |
+
},
|
482 |
+
"bleu_scores": {
|
483 |
+
"bleu": 0.06563163671591424,
|
484 |
+
"precisions": [
|
485 |
+
0.16778523489932887,
|
486 |
+
0.08352402745995423,
|
487 |
+
0.04800936768149883,
|
488 |
+
0.027577937649880094
|
489 |
+
],
|
490 |
+
"brevity_penalty": 1.0,
|
491 |
+
"length_ratio": 1.7738095238095237,
|
492 |
+
"translation_length": 894,
|
493 |
+
"reference_length": 504
|
494 |
+
},
|
495 |
+
"perplexity": 4116836.5
|
496 |
+
}
|
497 |
+
},
|
498 |
+
"best_param": {
|
499 |
+
"learning_rate": 0.0007681823532549075,
|
500 |
+
"gradient_accumulation_steps": 3,
|
501 |
+
"warmup_steps": 267,
|
502 |
+
"lr_scheduler_type": "cosine"
|
503 |
+
}
|
504 |
+
}
|
session_logs/hyperparameter_results.csv
ADDED
@@ -0,0 +1,17 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
learning_rate,gradient_accumulation_steps,warmup_steps,lr_scheduler_type,eval_results
|
2 |
+
0.0002521627789110728,3,1539,linear,"{'meteor_scores': {'meteor': 0.13681055469862474}, 'rouge_scores': {'rouge1': 0.05056556346878928, 'rouge2': 0.02, 'rougeL': 0.041474654377880185, 'rougeLsum': 0.041474654377880185}, 'bleu_scores': {'bleu': 0.03265459149664401, 'precisions': [0.08790593505039193, 0.041336353340883356, 0.023482245131729668, 0.013325608342989572], 'brevity_penalty': 1.0, 'length_ratio': 3.5436507936507935, 'translation_length': 1786, 'reference_length': 504}, 'perplexity': 46824568.0}"
|
3 |
+
0.00023424666142554082,3,840,linear,"{'meteor_scores': {'meteor': 0.15479365471627002}, 'rouge_scores': {'rouge1': 0.05056556346878928, 'rouge2': 0.02, 'rougeL': 0.041474654377880185, 'rougeLsum': 0.041474654377880185}, 'bleu_scores': {'bleu': 0.04584003595004448, 'precisions': [0.12137823022709475, 0.05807478122513922, 0.033144704931285365, 0.018898931799506986], 'brevity_penalty': 1.0, 'length_ratio': 2.5337301587301586, 'translation_length': 1277, 'reference_length': 504}, 'perplexity': 14256515.0}"
|
4 |
+
0.0004038905726999131,2,475,cosine,"{'meteor_scores': {'meteor': 0.15189724535656285}, 'rouge_scores': {'rouge1': 0.05056556346878928, 'rouge2': 0.02, 'rougeL': 0.04147465437788019, 'rougeLsum': 0.04147465437788019}, 'bleu_scores': {'bleu': 0.04315852976676815, 'precisions': [0.11275964391691394, 0.054969879518072286, 0.03134556574923547, 0.017857142857142856], 'brevity_penalty': 1.0, 'length_ratio': 2.6746031746031744, 'translation_length': 1348, 'reference_length': 504}, 'perplexity': 8112177.0}"
|
5 |
+
0.0007026402144219669,2,549,cosine,"{'meteor_scores': {'meteor': 0.1562446210181405}, 'rouge_scores': {'rouge1': 0.05056556346878928, 'rouge2': 0.02, 'rougeL': 0.041474654377880185, 'rougeLsum': 0.041474654377880185}, 'bleu_scores': {'bleu': 0.049508728443907975, 'precisions': [0.1289228159457167, 0.06298533218291631, 0.035996488147497806, 0.020554066130473638], 'brevity_penalty': 1.0, 'length_ratio': 2.3392857142857144, 'translation_length': 1179, 'reference_length': 504}, 'perplexity': 5523488.0}"
|
6 |
+
0.00020653097601095523,3,1802,linear,"{'meteor_scores': {'meteor': 0.15908333848043738}, 'rouge_scores': {'rouge1': 0.05056556346878928, 'rouge2': 0.02, 'rougeL': 0.041474654377880185, 'rougeLsum': 0.041474654377880185}, 'bleu_scores': {'bleu': 0.05901458903655539, 'precisions': [0.1529175050301811, 0.07494866529774127, 0.0429769392033543, 0.02462526766595289], 'brevity_penalty': 1.0, 'length_ratio': 1.9722222222222223, 'translation_length': 994, 'reference_length': 504}, 'perplexity': 4067049.25}"
|
7 |
+
0.0008565126838518531,4,1982,cosine,"{'meteor_scores': {'meteor': 0.16168163616406164}, 'rouge_scores': {'rouge1': 0.07004608294930877, 'rouge2': 0.02, 'rougeL': 0.05965647255969838, 'rougeLsum': 0.06000000000000001}, 'bleu_scores': {'bleu': 0.058454273860187325, 'precisions': [0.15376984126984128, 0.07388663967611336, 0.042355371900826444, 0.024261603375527425], 'brevity_penalty': 1.0, 'length_ratio': 2.0, 'translation_length': 1008, 'reference_length': 504}, 'perplexity': 4530443.0}"
|
8 |
+
0.00029985537229988896,3,0,linear,"{'meteor_scores': {'meteor': 0.1585074778231058}, 'rouge_scores': {'rouge1': 0.05056556346878928, 'rouge2': 0.02, 'rougeL': 0.041474654377880185, 'rougeLsum': 0.041474654377880185}, 'bleu_scores': {'bleu': 0.06491375147389095, 'precisions': [0.16685082872928178, 0.08248587570621468, 0.047398843930635835, 0.027218934911242602], 'brevity_penalty': 1.0, 'length_ratio': 1.7956349206349207, 'translation_length': 905, 'reference_length': 504}, 'perplexity': 4327195.5}"
|
9 |
+
0.0008550094708042585,4,1803,linear,"{'meteor_scores': {'meteor': 0.15646599253419569}, 'rouge_scores': {'rouge1': 0.05056556346878928, 'rouge2': 0.02, 'rougeL': 0.041474654377880185, 'rougeLsum': 0.041474654377880185}, 'bleu_scores': {'bleu': 0.06783137441554227, 'precisions': [0.17321016166281755, 0.08628841607565012, 0.04963680387409201, 0.028535980148883373], 'brevity_penalty': 1.0, 'length_ratio': 1.7182539682539681, 'translation_length': 866, 'reference_length': 504}, 'perplexity': 4576987.0}"
|
10 |
+
0.00015842823132994197,3,1716,linear,"{'meteor_scores': {'meteor': 0.15645022285051025}, 'rouge_scores': {'rouge1': 0.05056556346878928, 'rouge2': 0.02, 'rougeL': 0.041474654377880185, 'rougeLsum': 0.041474654377880185}, 'bleu_scores': {'bleu': 0.06758865797310103, 'precisions': [0.17261219792865362, 0.08598351001177856, 0.04945717732207479, 0.02843016069221261], 'brevity_penalty': 1.0, 'length_ratio': 1.7242063492063493, 'translation_length': 869, 'reference_length': 504}, 'perplexity': 3687402.75}"
|
11 |
+
0.0006671750787537489,2,134,linear,"{'meteor_scores': {'meteor': 0.15646599253419569}, 'rouge_scores': {'rouge1': 0.05056556346878928, 'rouge2': 0.02, 'rougeL': 0.041474654377880185, 'rougeLsum': 0.041474654377880185}, 'bleu_scores': {'bleu': 0.06783137441554227, 'precisions': [0.17321016166281755, 0.08628841607565012, 0.04963680387409201, 0.028535980148883373], 'brevity_penalty': 1.0, 'length_ratio': 1.7182539682539681, 'translation_length': 866, 'reference_length': 504}, 'perplexity': 3413910.5}"
|
12 |
+
0.0007681823532549075,3,267,cosine,"{'meteor_scores': {'meteor': 0.15645022285051025}, 'rouge_scores': {'rouge1': 0.05056556346878928, 'rouge2': 0.02, 'rougeL': 0.041474654377880185, 'rougeLsum': 0.041474654377880185}, 'bleu_scores': {'bleu': 0.06958047742513221, 'precisions': [0.17751479289940827, 0.08848484848484849, 0.05093167701863354, 0.02929936305732484], 'brevity_penalty': 1.0, 'length_ratio': 1.6765873015873016, 'translation_length': 845, 'reference_length': 504}, 'perplexity': 3161555.0}"
|
13 |
+
0.0006461830095508175,4,1149,linear,"{'meteor_scores': {'meteor': 0.15645022285051025}, 'rouge_scores': {'rouge1': 0.05056556346878928, 'rouge2': 0.02, 'rougeL': 0.041474654377880185, 'rougeLsum': 0.041474654377880185}, 'bleu_scores': {'bleu': 0.06726772615452566, 'precisions': [0.1718213058419244, 0.08558030480656506, 0.04921968787515006, 0.028290282902829027], 'brevity_penalty': 1.0, 'length_ratio': 1.7321428571428572, 'translation_length': 873, 'reference_length': 504}, 'perplexity': 3629504.5}"
|
14 |
+
0.0006627784604231771,2,1269,cosine,"{'meteor_scores': {'meteor': 0.15645022285051025}, 'rouge_scores': {'rouge1': 0.05056556346878928, 'rouge2': 0.02, 'rougeL': 0.041474654377880185, 'rougeLsum': 0.041474654377880185}, 'bleu_scores': {'bleu': 0.06958047742513221, 'precisions': [0.17751479289940827, 0.08848484848484849, 0.05093167701863354, 0.02929936305732484], 'brevity_penalty': 1.0, 'length_ratio': 1.6765873015873016, 'translation_length': 845, 'reference_length': 504}, 'perplexity': 3930782.75}"
|
15 |
+
0.0006313414373568891,4,1028,linear,"{'meteor_scores': {'meteor': 0.15645022285051025}, 'rouge_scores': {'rouge1': 0.05056556346878928, 'rouge2': 0.02, 'rougeL': 0.041474654377880185, 'rougeLsum': 0.041474654377880185}, 'bleu_scores': {'bleu': 0.06532898537563, 'precisions': [0.16703786191536749, 0.08314350797266515, 0.047785547785547784, 0.027446300715990454], 'brevity_penalty': 1.0, 'length_ratio': 1.7817460317460319, 'translation_length': 898, 'reference_length': 504}, 'perplexity': 3531264.0}"
|
16 |
+
0.000556438434253926,3,671,linear,"{'meteor_scores': {'meteor': 0.15645022285051025}, 'rouge_scores': {'rouge1': 0.05056556346878928, 'rouge2': 0.02, 'rougeL': 0.041474654377880185, 'rougeLsum': 0.041474654377880185}, 'bleu_scores': {'bleu': 0.06502911340827192, 'precisions': [0.1662971175166297, 0.08276643990929705, 0.04756380510440835, 0.027315914489311165], 'brevity_penalty': 1.0, 'length_ratio': 1.7896825396825398, 'translation_length': 902, 'reference_length': 504}, 'perplexity': 3744938.5}"
|
17 |
+
0.0006375506890882657,4,360,cosine,"{'meteor_scores': {'meteor': 0.15645022285051025}, 'rouge_scores': {'rouge1': 0.05056556346878928, 'rouge2': 0.02, 'rougeL': 0.041474654377880185, 'rougeLsum': 0.041474654377880185}, 'bleu_scores': {'bleu': 0.06563163671591424, 'precisions': [0.16778523489932887, 0.08352402745995423, 0.04800936768149883, 0.027577937649880094], 'brevity_penalty': 1.0, 'length_ratio': 1.7738095238095237, 'translation_length': 894, 'reference_length': 504}, 'perplexity': 4116836.5}"
|
session_logs/logs/events.out.tfevents.1740319079.c7b23710d8e3.3460.0
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:aa1b1c03fefc5c6bcdd3be2c00c3d0fba1c5eb02e111bfaf30b1a2db73dc1fca
|
3 |
+
size 851331
|
session_logs/logs/events.out.tfevents.1740336746.c7b23710d8e3.3460.1
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:4161039262b4851a30202f6cdb352384fafb553c315869f2777c7079e080057f
|
3 |
+
size 851292
|
session_logs/logs/events.out.tfevents.1740354238.c7b23710d8e3.3460.2
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:442284b71b2006207fdb07afd2b74be0c3da337c9145f67fc0d907380e4c5aca
|
3 |
+
size 851290
|
session_logs/logs/events.out.tfevents.1740371740.c7b23710d8e3.3460.3
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:760ad1f7b9ef8c722cc3f16caa99f272b341618f503b1c2e30a6a3d9935407cc
|
3 |
+
size 851290
|
session_logs/logs/events.out.tfevents.1740389236.c7b23710d8e3.3460.4
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:da1f6acd6ef386adb8dd4546e4249c42b86f35099331f21408c6472a553f77cf
|
3 |
+
size 851292
|
session_logs/logs/events.out.tfevents.1740406717.c7b23710d8e3.3460.5
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:ef11766c82a4c5211ad5626dd942f9f15b8a5d82cb1092a1e38c6c9d09955c4e
|
3 |
+
size 851290
|
session_logs/logs/events.out.tfevents.1740424200.c7b23710d8e3.3460.6
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:486480e702791b2fffca679a538d1cf42d6763270c6a2e118bb21a42a6745cd6
|
3 |
+
size 851292
|
session_logs/logs/events.out.tfevents.1740441669.c7b23710d8e3.3460.7
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:6e3597dd291d7aad9c8bb7d0b7c9de10121a182121cdbebfe07f8d6d2e3e28d5
|
3 |
+
size 851290
|
session_logs/logs/events.out.tfevents.1740459145.c7b23710d8e3.3460.8
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:4dd574974a44ece7a3f97978cc65420c14da02fdeb139672b00fb65b043294cd
|
3 |
+
size 851292
|
session_logs/logs/events.out.tfevents.1740476626.c7b23710d8e3.3460.9
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:a90b8d52576c107dcf0dc525f3ac9b7e438e002818c96c76f3f36c1b70a5ad3a
|
3 |
+
size 851290
|
session_logs/logs/events.out.tfevents.1740494119.c7b23710d8e3.3460.10
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:0016499d61fa491110022da91c500693998047e1917fe04ca9954e80e29e93d4
|
3 |
+
size 851290
|
session_logs/logs/events.out.tfevents.1740511606.c7b23710d8e3.3460.11
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:7a80c0d843d4663f4449fb8e862d1d53b66bcd71cad06e524220837a0fc831c0
|
3 |
+
size 851290
|
session_logs/logs/events.out.tfevents.1740529136.c7b23710d8e3.3460.12
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:974645fa6f18cfc9e7a784a4eea06f5fc0544e9c993b12b44daaa4ad8997723d
|
3 |
+
size 851290
|
session_logs/logs/events.out.tfevents.1740546623.c7b23710d8e3.3460.13
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:cc1f03efd0a022889842bdd132314620ef196ee1d4cebdab7eb3cf9da05a1497
|
3 |
+
size 851290
|
session_logs/logs/events.out.tfevents.1740564106.c7b23710d8e3.3460.14
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:7cf8130e1bb2983656f9a8120e16af5151a03f0b2a93fe52b1ead35131c61f1f
|
3 |
+
size 851288
|
session_logs/logs/events.out.tfevents.1740581607.c7b23710d8e3.3460.15
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:878970a34de66a086428478e437de6c04aad51421a6f4480cb5ecdf2c98353a0
|
3 |
+
size 851290
|