Initial commit for mix_d6_l3_h4_t40K_s372001
Browse files- behaviors.json +1 -0
- features.json +1 -0
- model.pth +3 -0
- training_loss.json +0 -0
behaviors.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
[{"position": 7, "layer": 0, "is_head": true, "num": 0, "tags": ["Fail%:18", "Impact:A76", "Math.Add:S012345", "Math.Sub:M0123", "Math.Neg:N1234", "Attn:P7=96", "Attn:P0=4"]}, {"position": 7, "layer": 0, "is_head": true, "num": 3, "tags": ["Fail%:24", "Impact:A76", "Math.Add:S12345", "Math.Sub:M012", "Math.Neg:N1234", "Attn:P0=94", "Attn:P7=4", "Attn:P6=1"]}, {"position": 7, "layer": 0, "is_head": false, "num": 0, "tags": ["Fail%:15", "Impact:A76", "Math.Add:S012345", "Math.Neg:N1"]}, {"position": 8, "layer": 0, "is_head": true, "num": 0, "tags": ["Fail%:23", "Impact:A765", "Math.Add:S012345", "Math.Sub:M0123", "Math.Neg:N1234", "Attn:P8=100"]}, {"position": 8, "layer": 0, "is_head": true, "num": 3, "tags": ["Fail%:25", "Impact:A7654", "Math.Add:S012345", "Math.Sub:M0", "Math.Neg:N1234", "Attn:P1=94", "Attn:P8=3"]}, {"position": 8, "layer": 0, "is_head": false, "num": 0, "tags": ["Fail%:20", "Impact:A765", "Math.Add:S012345", "Math.Sub:M123", "Math.Neg:N12"]}, {"position": 8, "layer": 1, "is_head": true, "num": 0, "tags": ["Fail%:0", "Impact:A6", "Math.Add:S2", "Attn:P6=32", "Attn:P8=24", "Attn:P4=12", "Attn:P3=10", "Math.Add:A4.SP"]}, {"position": 8, "layer": 1, "is_head": false, "num": 0, "tags": ["Fail%:0", "Impact:A65", "Math.Add:S12"]}, {"position": 9, "layer": 0, "is_head": true, "num": 0, "tags": ["Fail%:31", "Impact:A7654", "Math.Add:S012345", "Math.Sub:M0123", "Math.Neg:N1234", "Attn:P9=99"]}, {"position": 9, "layer": 0, "is_head": true, "num": 1, "tags": ["Fail%:0", "Impact:A7", "Math.Neg:N1", "Attn:P6=54", "Attn:P9=24", "Attn:P2=9", "Attn:P8=3"]}, {"position": 9, "layer": 0, "is_head": true, "num": 3, "tags": ["Fail%:33", "Impact:A7654", "Math.Add:S012345", "Math.Sub:M0123", "Math.Neg:N1234", "Attn:P2=95", "Attn:P9=2"]}, {"position": 9, "layer": 0, "is_head": false, "num": 0, "tags": ["Fail%:27", "Impact:A7654", "Math.Add:S12345", "Math.Sub:M123", "Math.Neg:N12"]}, {"position": 10, "layer": 0, "is_head": true, "num": 0, "tags": ["Fail%:32", "Impact:A76543", "Math.Add:S012345", "Math.Sub:M0123", "Math.Neg:N1234", "Attn:P10=100"]}, {"position": 10, "layer": 0, "is_head": true, "num": 1, "tags": ["Fail%:0", "Impact:A7", "Math.Neg:N1", "Attn:P6=41", "Attn:P10=34", "Attn:P3=11", "Attn:P9=4"]}, {"position": 10, "layer": 0, "is_head": true, "num": 3, "tags": ["Fail%:31", "Impact:A76543", "Math.Add:S012345", "Math.Sub:M0123", "Math.Neg:N1234", "Attn:P3=95", "Attn:P10=1"]}, {"position": 10, "layer": 0, "is_head": false, "num": 0, "tags": ["Fail%:25", "Impact:A76543", "Math.Add:S12345", "Math.Sub:M12", "Math.Neg:N12"]}, {"position": 11, "layer": 0, "is_head": true, "num": 0, "tags": ["Fail%:32", "Impact:A7654321", "Math.Add:S012345", "Math.Sub:M0123", "Math.Neg:N234", "Attn:P11=99"]}, {"position": 11, "layer": 0, "is_head": true, "num": 1, "tags": ["Fail%:0", "Impact:A7", "Math.Neg:N2", "Attn:P6=36", "Attn:P11=17", "Attn:P4=11", "Attn:P10=10"]}, {"position": 11, "layer": 0, "is_head": true, "num": 3, "tags": ["Fail%:27", "Impact:A7654321", "Math.Add:S012345", "Math.Sub:M0123", "Math.Neg:N1234", "Attn:P4=98"]}, {"position": 11, "layer": 0, "is_head": false, "num": 0, "tags": ["Fail%:22", "Impact:A765432", "Math.Add:S012345", "Math.Sub:M123", "Math.Neg:N12"]}, {"position": 11, "layer": 1, "is_head": true, "num": 0, "tags": ["Fail%:0", "Impact:A7", "Math.Sub:M2", "Attn:P6=24", "Attn:P11=24", "Attn:P10=13", "Attn:P3=7", "Math.Add:A1.SP"]}, {"position": 12, "layer": 0, "is_head": true, "num": 0, "tags": ["Fail%:20", "Impact:A7654321", "Math.Add:S012345", "Math.Sub:M0123", "Math.Neg:N1", "Attn:P12=89", "Attn:P6=5", "Attn:P11=2"]}, {"position": 12, "layer": 0, "is_head": true, "num": 3, "tags": ["Fail%:22", "Impact:A7654321", "Math.Add:S012345", "Math.Sub:M0", "Math.Neg:N1234", "Attn:P5=93", "Attn:P12=2"]}, {"position": 12, "layer": 0, "is_head": false, "num": 0, "tags": ["Fail%:15", "Impact:A7654321", "Math.Add:S12345", "Math.Sub:M023", "Math.Neg:N12"]}, {"position": 12, "layer": 1, "is_head": true, "num": 0, "tags": ["Fail%:0", "Impact:A7", "Math.Sub:M2", "Attn:P6=51", "Attn:P12=24", "Attn:P11=14", "Attn:P10=5", "Math.Add:A0.SP"]}, {"position": 12, "layer": 1, "is_head": false, "num": 0, "tags": ["Fail%:1", "Impact:A432", "Math.Add:S345", "Math.Sub:M0"]}, {"position": 13, "layer": 0, "is_head": true, "num": 3, "tags": ["Fail%:1", "Impact:A7", "Math.Neg:N12", "Attn:P6=98"]}, {"position": 13, "layer": 0, "is_head": false, "num": 0, "tags": ["Fail%:6", "Impact:A7", "Math.Neg:N1234"]}, {"position": 13, "layer": 1, "is_head": true, "num": 1, "tags": ["Math.Add:A1.SP"]}, {"position": 14, "layer": 0, "is_head": true, "num": 0, "tags": ["Fail%:3", "Impact:A60", "Math.Sub:M0123", "Attn:P7=50", "Attn:P0=22", "Attn:P6=16", "Attn:P14=6"]}, {"position": 14, "layer": 0, "is_head": false, "num": 0, "tags": ["Fail%:2", "Impact:A64320", "Math.Sub:M0123"]}, {"position": 14, "layer": 2, "is_head": true, "num": 1, "tags": ["Fail%:2", "Impact:A6", "Math.Add:S01235", "Attn:P7=50", "Attn:P8=17", "Attn:P9=9", "Attn:P12=7", "Math.Add:A5.SP"]}, {"position": 14, "layer": 2, "is_head": false, "num": 0, "tags": ["Fail%:2", "Impact:A6", "Math.Add:S345"]}, {"position": 15, "layer": 0, "is_head": true, "num": 0, "tags": ["Fail%:49", "Impact:A5", "Math.Add:S012345", "Math.Sub:M0123", "Math.Neg:N1234", "Attn:P7=63", "Attn:P0=30"]}, {"position": 15, "layer": 0, "is_head": true, "num": 1, "tags": ["Fail%:1", "Impact:A5", "Math.Sub:M0123", "Math.Neg:N3", "Attn:P1=38", "Attn:P8=31", "Attn:P6=16", "Attn:P14=7"]}, {"position": 15, "layer": 0, "is_head": true, "num": 3, "tags": ["Fail%:61", "Impact:A5", "Math.Add:S012345", "Math.Sub:M0123", "Math.Neg:N1234", "Attn:P0=61", "Attn:P7=30", "Attn:P14=1"]}, {"position": 15, "layer": 0, "is_head": false, "num": 0, "tags": ["Fail%:42", "Impact:A5", "Math.Add:S012345", "Math.Sub:M0123", "Math.Neg:N1"]}, {"position": 15, "layer": 2, "is_head": true, "num": 1, "tags": ["Fail%:7", "Impact:A5", "Math.Add:S12345", "Math.Sub:M2", "Math.Neg:N1", "Attn:P8=46", "Attn:P9=17", "Attn:P10=8", "Attn:P1=6", "Math.Add:A4.SP"]}, {"position": 15, "layer": 2, "is_head": true, "num": 2, "tags": ["Fail%:0", "Impact:A5", "Math.Sub:M0", "Attn:P15=31", "Attn:P13=10", "Attn:P14=8", "Attn:P6=8"]}, {"position": 15, "layer": 2, "is_head": false, "num": 0, "tags": ["Fail%:20", "Impact:A5", "Math.Add:S012345", "Math.Sub:M123", "Math.Neg:N1"]}, {"position": 16, "layer": 0, "is_head": true, "num": 0, "tags": ["Fail%:57", "Impact:A4", "Math.Add:S012345", "Math.Sub:M0123", "Math.Neg:N1234", "Attn:P8=99"]}, {"position": 16, "layer": 0, "is_head": true, "num": 1, "tags": ["Fail%:1", "Impact:A4", "Math.Neg:N134", "Attn:P2=37", "Attn:P9=34", "Attn:P6=15", "Attn:P14=9"]}, {"position": 16, "layer": 0, "is_head": true, "num": 3, "tags": ["Fail%:76", "Impact:A4", "Math.Add:S012345", "Math.Sub:M0123", "Math.Neg:N1234", "Attn:P1=88", "Attn:P8=9"]}, {"position": 16, "layer": 0, "is_head": false, "num": 0, "tags": ["Fail%:44", "Impact:A4", "Math.Add:S012345", "Math.Sub:M0123", "Math.Neg:N12"]}, {"position": 16, "layer": 1, "is_head": false, "num": 0, "tags": ["Fail%:0", "Impact:A4", "Math.Sub:M0"]}, {"position": 16, "layer": 2, "is_head": true, "num": 1, "tags": ["Fail%:14", "Impact:A4", "Math.Add:S12345", "Math.Sub:M3", "Math.Neg:N1234", "Attn:P9=43", "Attn:P10=22", "Attn:P12=10", "Attn:P11=8", "Math.Add:A3.SP"]}, {"position": 16, "layer": 2, "is_head": true, "num": 3, "tags": ["Fail%:3", "Impact:A4", "Math.Sub:M0123", "Attn:P9=16", "Attn:P16=11", "Attn:P14=9", "Attn:P10=9"]}, {"position": 16, "layer": 2, "is_head": false, "num": 0, "tags": ["Fail%:30", "Impact:A4", "Math.Add:S012345", "Math.Sub:M0123"]}, {"position": 17, "layer": 0, "is_head": true, "num": 0, "tags": ["Fail%:55", "Impact:A3", "Math.Add:S012345", "Math.Sub:M0123", "Math.Neg:N1234", "Attn:P9=99"]}, {"position": 17, "layer": 0, "is_head": true, "num": 3, "tags": ["Fail%:77", "Impact:A3", "Math.Add:S012345", "Math.Sub:M0123", "Math.Neg:N1234", "Attn:P2=88", "Attn:P9=9"]}, {"position": 17, "layer": 0, "is_head": false, "num": 0, "tags": ["Fail%:47", "Impact:A3", "Math.Add:S0123", "Math.Sub:M0123", "Math.Neg:N1234"]}, {"position": 17, "layer": 2, "is_head": true, "num": 1, "tags": ["Fail%:10", "Impact:A3", "Math.Add:S12345", "Math.Sub:M0", "Math.Neg:N123", "Attn:P10=44", "Attn:P11=20", "Attn:P12=19", "Attn:P14=7", "Math.Add:A2.SP"]}, {"position": 17, "layer": 2, "is_head": true, "num": 2, "tags": ["Fail%:0", "Impact:A3", "Math.Sub:M0", "Attn:P17=18", "Attn:P6=12", "Attn:P10=11", "Attn:P13=10"]}, {"position": 17, "layer": 2, "is_head": true, "num": 3, "tags": ["Fail%:5", "Impact:A3", "Math.Sub:M0123", "Attn:P10=15", "Attn:P17=11", "Attn:P14=8", "Attn:P11=8"]}, {"position": 17, "layer": 2, "is_head": false, "num": 0, "tags": ["Fail%:30", "Impact:A3", "Math.Add:S012345", "Math.Sub:M0123", "Math.Neg:N23"]}, {"position": 18, "layer": 0, "is_head": true, "num": 0, "tags": ["Fail%:66", "Impact:A2", "Math.Add:S012345", "Math.Sub:M0123", "Math.Neg:N1234", "Attn:P10=98"]}, {"position": 18, "layer": 0, "is_head": true, "num": 1, "tags": ["Fail%:0", "Impact:A2", "Math.Neg:N2", "Attn:P4=37", "Attn:P11=36", "Attn:P6=15", "Attn:P14=9"]}, {"position": 18, "layer": 0, "is_head": true, "num": 3, "tags": ["Fail%:80", "Impact:A2", "Math.Add:S012345", "Math.Sub:M0123", "Math.Neg:N1234", "Attn:P3=88", "Attn:P10=7"]}, {"position": 18, "layer": 0, "is_head": false, "num": 0, "tags": ["Fail%:54", "Impact:A2", "Math.Add:S01234", "Math.Sub:M0123", "Math.Neg:N1234"]}, {"position": 18, "layer": 1, "is_head": true, "num": 0, "tags": ["Math.Add:A4.SP"]}, {"position": 18, "layer": 1, "is_head": true, "num": 3, "tags": ["Fail%:0", "Impact:A2", "Math.Add:S1", "Math.Sub:M1", "Attn:P11=58", "Attn:P6=14", "Attn:P13=12", "Attn:P10=5"]}, {"position": 18, "layer": 1, "is_head": false, "num": 0, "tags": ["Fail%:0", "Impact:A2", "Math.Add:S2"]}, {"position": 18, "layer": 2, "is_head": true, "num": 1, "tags": ["Fail%:14", "Impact:A2", "Math.Add:S2345", "Math.Sub:M0", "Math.Neg:N1234", "Attn:P11=44", "Attn:P12=39", "Attn:P14=7", "Attn:P5=2", "Math.Add:A1.SP"]}, {"position": 18, "layer": 2, "is_head": true, "num": 3, "tags": ["Fail%:5", "Impact:A2", "Math.Sub:M012", "Attn:P12=15", "Attn:P11=14", "Attn:P18=13", "Attn:P5=9"]}, {"position": 18, "layer": 2, "is_head": false, "num": 0, "tags": ["Fail%:29", "Impact:A2", "Math.Add:S012345", "Math.Sub:M0123", "Math.Neg:N234"]}, {"position": 19, "layer": 0, "is_head": true, "num": 0, "tags": ["Fail%:79", "Impact:A1", "Math.Add:S012345", "Math.Sub:M0123", "Math.Neg:N1234", "Attn:P11=99"]}, {"position": 19, "layer": 0, "is_head": true, "num": 1, "tags": ["Fail%:0", "Impact:A1", "Math.Neg:N2", "Attn:P12=39", "Attn:P5=32", "Attn:P6=15", "Attn:P14=10"]}, {"position": 19, "layer": 0, "is_head": true, "num": 3, "tags": ["Fail%:84", "Impact:A1", "Math.Add:S012345", "Math.Sub:M0123", "Math.Neg:N1234", "Attn:P4=94", "Attn:P11=3"]}, {"position": 19, "layer": 0, "is_head": false, "num": 0, "tags": ["Fail%:60", "Impact:A1", "Math.Add:S012345", "Math.Sub:M0123", "Math.Neg:N1234"]}, {"position": 19, "layer": 1, "is_head": true, "num": 3, "tags": ["Fail%:0", "Impact:A1", "Math.Add:S3", "Attn:P12=50", "Attn:P6=14", "Attn:P13=10", "Attn:P11=8"]}, {"position": 19, "layer": 2, "is_head": true, "num": 1, "tags": ["Fail%:14", "Impact:A1", "Math.Add:S2345", "Math.Sub:M0123", "Math.Neg:N1234", "Attn:P12=72", "Attn:P14=8", "Attn:P6=7", "Attn:P1=3"]}, {"position": 19, "layer": 2, "is_head": true, "num": 3, "tags": ["Fail%:4", "Impact:A1", "Math.Sub:M012", "Attn:P19=18", "Attn:P12=18", "Attn:P13=9", "Attn:P5=9"]}, {"position": 19, "layer": 2, "is_head": false, "num": 0, "tags": ["Fail%:35", "Impact:A1", "Math.Add:S012345", "Math.Sub:M0123", "Math.Neg:N234"]}, {"position": 20, "layer": 0, "is_head": true, "num": 0, "tags": ["Fail%:60", "Impact:A0", "Math.Add:S012345", "Math.Sub:M0123", "Math.Neg:N1234", "Attn:P12=88", "Attn:P14=3", "Attn:P5=2", "Attn:P6=1"]}, {"position": 20, "layer": 0, "is_head": true, "num": 3, "tags": ["Fail%:84", "Impact:A0", "Math.Add:S012345", "Math.Sub:M0123", "Math.Neg:N1234", "Attn:P5=78", "Attn:P12=12"]}, {"position": 20, "layer": 0, "is_head": false, "num": 0, "tags": ["Fail%:55", "Impact:A0", "Math.Add:S012345", "Math.Sub:M0123", "Math.Neg:N1234"]}, {"position": 20, "layer": 1, "is_head": true, "num": 3, "tags": ["Fail%:0", "Impact:A0", "Math.Add:S12", "Attn:P12=27", "Attn:P14=19", "Attn:P6=18", "Attn:P13=14"]}, {"position": 20, "layer": 1, "is_head": false, "num": 0, "tags": ["Fail%:3", "Impact:A0", "Math.Add:S01234"]}, {"position": 20, "layer": 2, "is_head": true, "num": 3, "tags": ["Fail%:4", "Impact:A0", "Math.Sub:M0123", "Attn:P20=12", "Attn:P12=11", "Attn:P14=11", "Attn:P13=9"]}, {"position": 20, "layer": 2, "is_head": false, "num": 0, "tags": ["Fail%:15", "Impact:A0", "Math.Add:S012345", "Math.Sub:M0123", "Math.Neg:N234"]}]
|
features.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
[{"position": 7, "layer": 0, "is_head": true, "num": 0, "tags": ["Algo:A5.ST", "Algo:A5.MT"]}, {"position": 7, "layer": 0, "is_head": true, "num": 3, "tags": ["Algo:A5.MT", "Algo:OPR", "Algo:A5.GT"]}, {"position": 7, "layer": 0, "is_head": false, "num": 0, "tags": []}, {"position": 8, "layer": 0, "is_head": true, "num": 0, "tags": []}, {"position": 8, "layer": 0, "is_head": true, "num": 3, "tags": ["Algo:A4.MT", "Algo:A4.GT"]}, {"position": 8, "layer": 0, "is_head": false, "num": 0, "tags": []}, {"position": 8, "layer": 1, "is_head": true, "num": 0, "tags": ["Algo:OPR"]}, {"position": 8, "layer": 1, "is_head": false, "num": 0, "tags": []}, {"position": 9, "layer": 0, "is_head": true, "num": 0, "tags": []}, {"position": 9, "layer": 0, "is_head": true, "num": 1, "tags": ["Algo:OPR"]}, {"position": 9, "layer": 0, "is_head": true, "num": 3, "tags": ["Algo:A3.MT", "Algo:A3.GT"]}, {"position": 9, "layer": 0, "is_head": false, "num": 0, "tags": []}, {"position": 10, "layer": 0, "is_head": true, "num": 0, "tags": []}, {"position": 10, "layer": 0, "is_head": true, "num": 1, "tags": ["Algo:OPR"]}, {"position": 10, "layer": 0, "is_head": true, "num": 3, "tags": ["Algo:A2.MT", "Algo:A2.GT"]}, {"position": 10, "layer": 0, "is_head": false, "num": 0, "tags": []}, {"position": 11, "layer": 0, "is_head": true, "num": 0, "tags": []}, {"position": 11, "layer": 0, "is_head": true, "num": 1, "tags": ["Algo:OPR"]}, {"position": 11, "layer": 0, "is_head": true, "num": 3, "tags": []}, {"position": 11, "layer": 0, "is_head": false, "num": 0, "tags": []}, {"position": 11, "layer": 1, "is_head": true, "num": 0, "tags": ["Algo:OPR"]}, {"position": 12, "layer": 0, "is_head": true, "num": 0, "tags": ["Algo:OPR"]}, {"position": 12, "layer": 0, "is_head": true, "num": 3, "tags": ["Algo:A0.MT", "Algo:A0.GT"]}, {"position": 12, "layer": 0, "is_head": false, "num": 0, "tags": []}, {"position": 12, "layer": 1, "is_head": true, "num": 0, "tags": ["Algo:OPR"]}, {"position": 12, "layer": 1, "is_head": false, "num": 0, "tags": []}, {"position": 13, "layer": 0, "is_head": true, "num": 3, "tags": ["Algo:OPR"]}, {"position": 13, "layer": 0, "is_head": false, "num": 0, "tags": []}, {"position": 13, "layer": 1, "is_head": true, "num": 1, "tags": []}, {"position": 14, "layer": 0, "is_head": true, "num": 0, "tags": ["Algo:OPR", "Algo:SGN"]}, {"position": 14, "layer": 0, "is_head": false, "num": 0, "tags": []}, {"position": 14, "layer": 2, "is_head": true, "num": 1, "tags": ["Algo:A4.ST"]}, {"position": 14, "layer": 2, "is_head": false, "num": 0, "tags": []}, {"position": 15, "layer": 0, "is_head": true, "num": 0, "tags": ["Algo:A5.SA", "Algo:A5.MD", "Algo:A5.ND.A5"]}, {"position": 15, "layer": 0, "is_head": true, "num": 1, "tags": ["Algo:OPR", "Algo:SGN"]}, {"position": 15, "layer": 0, "is_head": true, "num": 3, "tags": ["Algo:A5.SA", "Algo:A5.MD", "Algo:SGN"]}, {"position": 15, "layer": 0, "is_head": false, "num": 0, "tags": []}, {"position": 15, "layer": 2, "is_head": true, "num": 1, "tags": ["Algo:A4.SC", "Algo:A4.MB"]}, {"position": 15, "layer": 2, "is_head": true, "num": 2, "tags": ["Algo:OPR", "Algo:SGN"]}, {"position": 15, "layer": 2, "is_head": false, "num": 0, "tags": []}, {"position": 16, "layer": 0, "is_head": true, "num": 0, "tags": []}, {"position": 16, "layer": 0, "is_head": true, "num": 1, "tags": ["Algo:OPR", "Algo:SGN"]}, {"position": 16, "layer": 0, "is_head": true, "num": 3, "tags": ["Algo:A4.SA.A4", "Algo:A4.MD.A4", "Algo:A4.ND.A4"]}, {"position": 16, "layer": 0, "is_head": false, "num": 0, "tags": []}, {"position": 16, "layer": 1, "is_head": false, "num": 0, "tags": []}, {"position": 16, "layer": 2, "is_head": true, "num": 1, "tags": []}, {"position": 16, "layer": 2, "is_head": true, "num": 3, "tags": ["Algo:SGN"]}, {"position": 16, "layer": 2, "is_head": false, "num": 0, "tags": []}, {"position": 17, "layer": 0, "is_head": true, "num": 0, "tags": []}, {"position": 17, "layer": 0, "is_head": true, "num": 3, "tags": ["Algo:A3.SA.A3", "Algo:A3.MD.A3", "Algo:A3.ND.A3"]}, {"position": 17, "layer": 0, "is_head": false, "num": 0, "tags": []}, {"position": 17, "layer": 2, "is_head": true, "num": 1, "tags": ["Algo:SGN"]}, {"position": 17, "layer": 2, "is_head": true, "num": 2, "tags": ["Algo:OPR"]}, {"position": 17, "layer": 2, "is_head": true, "num": 3, "tags": ["Algo:SGN"]}, {"position": 17, "layer": 2, "is_head": false, "num": 0, "tags": []}, {"position": 18, "layer": 0, "is_head": true, "num": 0, "tags": []}, {"position": 18, "layer": 0, "is_head": true, "num": 1, "tags": ["Algo:OPR", "Algo:SGN"]}, {"position": 18, "layer": 0, "is_head": true, "num": 3, "tags": ["Algo:A2.SA.A2", "Algo:A2.MD.A2", "Algo:A2.ND.A2"]}, {"position": 18, "layer": 0, "is_head": false, "num": 0, "tags": []}, {"position": 18, "layer": 1, "is_head": true, "num": 0, "tags": []}, {"position": 18, "layer": 1, "is_head": true, "num": 3, "tags": ["Algo:OPR"]}, {"position": 18, "layer": 1, "is_head": false, "num": 0, "tags": []}, {"position": 18, "layer": 2, "is_head": true, "num": 1, "tags": ["Algo:A1.SS", "Algo:SGN"]}, {"position": 18, "layer": 2, "is_head": true, "num": 3, "tags": []}, {"position": 18, "layer": 2, "is_head": false, "num": 0, "tags": []}, {"position": 19, "layer": 0, "is_head": true, "num": 0, "tags": []}, {"position": 19, "layer": 0, "is_head": true, "num": 1, "tags": ["Algo:OPR", "Algo:SGN"]}, {"position": 19, "layer": 0, "is_head": true, "num": 3, "tags": ["Algo:A1.SA.A1", "Algo:A1.MD.A1", "Algo:A1.ND.A1"]}, {"position": 19, "layer": 0, "is_head": false, "num": 0, "tags": []}, {"position": 19, "layer": 1, "is_head": true, "num": 3, "tags": ["Algo:OPR"]}, {"position": 19, "layer": 2, "is_head": true, "num": 1, "tags": ["Algo:OPR", "Algo:SGN"]}, {"position": 19, "layer": 2, "is_head": true, "num": 3, "tags": []}, {"position": 19, "layer": 2, "is_head": false, "num": 0, "tags": []}, {"position": 20, "layer": 0, "is_head": true, "num": 0, "tags": ["Algo:A0.SA", "Algo:A0.MD", "Algo:OPR", "Algo:SGN", "Algo:A0.ND"]}, {"position": 20, "layer": 0, "is_head": true, "num": 3, "tags": ["Algo:A0.SA", "Algo:A0.MD", "Algo:A0.ND"]}, {"position": 20, "layer": 0, "is_head": false, "num": 0, "tags": []}, {"position": 20, "layer": 1, "is_head": true, "num": 3, "tags": ["Algo:OPR", "Algo:SGN"]}, {"position": 20, "layer": 1, "is_head": false, "num": 0, "tags": []}, {"position": 20, "layer": 2, "is_head": true, "num": 3, "tags": ["Algo:SGN"]}, {"position": 20, "layer": 2, "is_head": false, "num": 0, "tags": []}]
|
model.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:547986c69e05664cd027543ec8be457b6066819ad97a668c8119a9404ab37102
|
3 |
+
size 41837346
|
training_loss.json
ADDED
The diff for this file is too large to render.
See raw diff
|
|