to_delete / dreamcoder /domains /regex /groundtruthRegexes.py
Fraser-Greenlee
add dreamcoder codebase
e1c1753
raw
history blame
4.25 kB
#dict of gt regexes
"""
pre.create(".+"),
pre.create("\d+"),
pre.create("\w+"),
pre.create("\s+"),
pre.create("\\u+"),
pre.create("\l+")
"""
gt_dict = {
776: "JPC\\u\\u\\d+\\.png",
922: "WHS\\d_\\d+",
354: "\\u+",
523: "(\\u)+|\\.",
184: "\\.\\d+",
501: "u\\d\\d",
760: "\\u\\u",
49: "(\\u)+\\u\\d?",
732: "\\uR5\\d\\d",
450: "-\\d(\\.(\\d)+)?",
350: "\\u\\u",
467: "hu\\d(\\d|\\u)+",
622: "A(\\d|\\u)**",
476: "\\u+",
554: "\\u\\u",
940: "\\u\\u?",
496: "\\u\\u",
369: "\\u\\u\\u",
596: "\\u+",
720: "\\(\\d\\d\\d\\) \\d\\d\\d-\\d\\d\\d\\d",
53: "rec-\\d\\d\\d?-(org)|(dup-0)",
150: "N\\d\\d",
741: "#\\d\\d\\d",
18: "A|C-\\d+-\\d+",
589: "A(\\u|\\d)++",
666: "\\(\\d\\d\\d\\) \\d\\d\\d-\\d\\d\\d\\d",
581: "us13\\u\\d\\d",
299: "E07000\\d\\d\\d",
638: "\\l+\\d+\\l+\\d+",
364: "\\u\\u",
334: "-00:\\d\\d:\\d\\d.\\d",
38: "SRX89\\d+",
247: "'\\d\\d:\\d\\d:00'",
506: "(S|H)\\d+",
891: "(r|v)\\d?",
911: "KW-\\d+",
792: "\\d*\\u*",
508: "N000\\d+",
842: "-?\\d?\\d\\.\\d\\d%",
200: "\\u\\u",
694: "\\(\\d+\\)",
210: "(\\d(\\.\\d)?)|(--)",
298: "DS_25(\\u|\\d)+",
668: "\\u+",
939: "ms0\\d+",
944: "\\u+\\d?",
731: "ManH.0\\d\\d",
229: "\\u+(-\\u+)?",
28: "Y201\\d/\\d\\d\\d\\d",
374: "q000\\d(_000\\d)?",
819: "\\d*\\l*\\d*",
516: "-122.3\\d+",
417: "\\u\\uT\\uB",
660: "ENGL?\\d\\d\\d",
585: "M?\\u+",
325: "BUS M \\d\\d\\d.*",
823: "\\u\\u\\u",
515: "L|\\u - (\\?\\?)|(\\d?\\d\\.\\d lbs\\.)",
864: "\\u+",
359: "MAM\\.OSBS\\.201\\d\\.\\d\\d",
594: "(\\u|\\d)+( (\\u|\\d)+)*",
788: "-\\d(,\\d+)?",
188: "cat\\. \\d\\d",
355: ".+",
799: "\\u\\d\\d",
902: "\\u\\d\\d",
920: "A\\.\\d\\d",
330: "Resp\\d\\d",
396: "\\u+(( |/)\\u+)?",
393: "US $ \\d\\.\\d\\d",
680: "Z:-?0\\.\\d\\d",
744: "t1_cv(\\l|\\d)+",
461: "(\\u|\\l)+\\d+",
631: "$\\d+\\.\\d+",
195: "(OLE)?\\d+",
693: "\\u",
577: "EFO_000\\d+",
392: "$\\d+(,\\d\\d\\d)*\\.00",
688: "\\u+( \\u+)*",
816: "\\u\\u\\u",
489: "UK\\u\\d",
251: "\\l\\l\\l",
653: "C\\d+",
769: "(\\u|\\l|\\d|-)+\\d+",
991: "Q\\d-201\\d",
342: "\\u\\u\\d\\d\\d\\d",
308: "\\u\\u\\u\\u",
136: "IMPC_\\u\\u\\u_\\d\\d\\d_\\d\\d\\d",
327: "#\\d+((/|-)\\d+)*",
981: "\\u\\u\\u",
892: "(.|\\l)*",
375: "P\\u\\.\\d\\d\\d\\d\\.\\d\\d\\d",
499: "A000\\d+",
474: "\\u+",
50: "V06\\d+",
381: "F?\\d+",
883: "-79.\\d+",
173: "(\\u|\\l)+\\d+",
147: "\\u\\u\\u-\\u\\u\\u",
419: "\\u\\u",
961: "-?\\d\\.\\d*",
148: "Q\\d\\d",
975: "(\\d|\\u)+",
79: "\\d+(,\\d\\d\\d)+",
775: "\\u\\l\\l \\d+ \\d\\d\\d\\d",
774: "FOS\\d\\d+",
561: ".+",
509: "S000\\d+",
494: "S1900\\d+",
119: "$\\d\\d(,\\d\\d\\d)+",
29: "(\\u|\\l|\\d)+",
121: "(\\d|\\u|\\.|/|\\(|\\))+",
61: "R \\d\\d\\d.\\d\\d",
871: "-0.7\\d+",
639: "\\u+?\\d+",
729: "COMISARIA \\d\\d",
193: "\\u\\d\\d",
752: "(.*|\\u\\.?)+",
17: "$\\d.\\d\\d",
914: "R\\d\\d\\d\\d",
510: "P\\d000\\d\\d\\d\\d",
443: "(W|L) \\d-\\d+",
20: "MDEL\\d\\d?\\.\\d\\l",
64: "c04p0100(\\l|\\d)",
301: "(\\u|\\d)+(-(\\u|\\d)+)*",
664: "N\\d",
493: "[0\\.0\\d+]",
765: "-?\\d\\.\\d+( \\(0\\.\\d+\\))?"
}
badRegexTasks = {
"Data column no. 922",
"Data column no. 184",
"Data column no. 467",
"Data column no. 476",
"Data column no. 150",
"Data column no. 299",
"Data column no. 334",
"Data column no. 493",
"Data column no. 891",
"Data column no. 792",
"Data column no. 765",
"Data column no. 944",
"Data column no. 374",
"Data column no. 660",
"Data column no. 188",
"Data column no. 920",
"Data column no. 330",
"Data column no. 396",
"Data column no. 680",
"Data column no. 769",
"Data column no. 308",
"Data column no. 375",
"Data column no. 474",
"Data column no. 79",
"Data column no. 871",
"Data column no. 729",
"Data column no. 664",
}