luisoala commited on
Commit
8287176
Β·
1 Parent(s): ef14ba1

Merge working-april-04 validation changes

Browse files
Files changed (1) hide show
  1. validation.py +11 -13
validation.py CHANGED
@@ -1,12 +1,11 @@
1
  import json
2
- import mlcroissant as mlc
3
  import traceback
4
- import asyncio
5
- from asyncio import timeout
6
 
7
  WAIT_TIME = 5 * 60 # seconds
8
 
9
- async def validate_json(file_path):
10
  """Validate that the file is proper JSON."""
11
  try:
12
  with open(file_path, 'r') as f:
@@ -19,7 +18,7 @@ async def validate_json(file_path):
19
  error_message = f"❌ Error reading file: {str(e)}"
20
  return False, error_message, None
21
 
22
- async def validate_croissant(json_data):
23
  """Validate that the JSON follows Croissant schema."""
24
  try:
25
  dataset = mlc.Dataset(jsonld=json_data)
@@ -33,7 +32,7 @@ async def validate_croissant(json_data):
33
  error_message = f"❌ Unexpected error during validation: {str(e)}\n\n{error_details}"
34
  return False, error_message
35
 
36
- async def validate_records(json_data):
37
  """Validate that records can be generated within the time limit."""
38
  try:
39
  dataset = mlc.Dataset(jsonld=json_data)
@@ -46,13 +45,12 @@ async def validate_records(json_data):
46
 
47
  for record_set in record_sets:
48
  try:
49
- async with timeout(WAIT_TIME):
50
- records = dataset.records(record_set=record_set.uuid)
51
- print(records)
52
- _ = next(iter(records))
53
- results.append(f"βœ… Record set '{record_set.uuid}' passed validation.")
54
- except asyncio.TimeoutError:
55
- error_message = f"❌ Record set '{record_set.uuid}' generation took too long (>{WAIT_TIME}s)"
56
  return False, error_message
57
  except Exception as e:
58
  error_details = traceback.format_exc()
 
1
  import json
 
2
  import traceback
3
+ import mlcroissant as mlc
4
+ import func_timeout
5
 
6
  WAIT_TIME = 5 * 60 # seconds
7
 
8
+ def validate_json(file_path):
9
  """Validate that the file is proper JSON."""
10
  try:
11
  with open(file_path, 'r') as f:
 
18
  error_message = f"❌ Error reading file: {str(e)}"
19
  return False, error_message, None
20
 
21
+ def validate_croissant(json_data):
22
  """Validate that the JSON follows Croissant schema."""
23
  try:
24
  dataset = mlc.Dataset(jsonld=json_data)
 
32
  error_message = f"❌ Unexpected error during validation: {str(e)}\n\n{error_details}"
33
  return False, error_message
34
 
35
+ def validate_records(json_data):
36
  """Validate that records can be generated within the time limit."""
37
  try:
38
  dataset = mlc.Dataset(jsonld=json_data)
 
45
 
46
  for record_set in record_sets:
47
  try:
48
+ records = dataset.records(record_set=record_set.uuid)
49
+ print(records)
50
+ _ = func_timeout.func_timeout(WAIT_TIME, lambda: next(iter(records)))
51
+ results.append(f"βœ… Record set '{record_set.uuid}' passed validation.")
52
+ except func_timeout.exceptions.FunctionTimedOut:
53
+ error_message = f"❌ Record set '{record_set.uuid}' generation took too long (>300s)"
 
54
  return False, error_message
55
  except Exception as e:
56
  error_details = traceback.format_exc()