Forrest99 commited on
Commit
144fc9e
·
verified ·
1 Parent(s): ff162ee

Create validate_data.py

Browse files
Files changed (1) hide show
  1. validate_data.py +45 -0
validate_data.py ADDED
@@ -0,0 +1,45 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ def validate_inputs(snippets):
2
+ """数据验证工具"""
3
+ errors = []
4
+ valid_data = []
5
+
6
+ for idx, s in enumerate(snippets):
7
+ # 类型检查
8
+ if not isinstance(s, str):
9
+ errors.append(f"索引 {idx}: 类型错误 ({type(s)}) → 已转换")
10
+ s = str(s)
11
+
12
+ # 清理数据
13
+ cleaned = s.replace("...", "").strip()
14
+
15
+ # 有效性检查
16
+ if len(cleaned) < 10: # 最小代码长度
17
+ errors.append(f"索引 {idx}: 代码过短 ({len(cleaned)} 字符)")
18
+ continue
19
+
20
+ valid_data.append(cleaned)
21
+
22
+ # 输出报告
23
+ print(f"验证结果:\n成功 {len(valid_data)} 条\n错误 {len(errors)} 条")
24
+ for err in errors[:3]: # 显示前3个错误
25
+ print(f" - {err}")
26
+ if len(errors) > 3:
27
+ print(f" ...(共 {len(errors)} 个错误)")
28
+
29
+ return valid_data
30
+
31
+ if __name__ == "__main__":
32
+ # 测试数据
33
+ test_data = [
34
+ "def example(): pass",
35
+ 12345, # 错误类型
36
+ "print(...)", # 包含...
37
+ " ", # 空字符串
38
+ """def valid_func():
39
+ return '正确代码'"""
40
+ ]
41
+
42
+ validated = validate_inputs(test_data)
43
+ print("\n有效数据示例:")
44
+ for i, data in enumerate(validated[:2]):
45
+ print(f"[{i}] {data[:50]}...")