luisoala commited on
Commit
7c8fccb
Β·
1 Parent(s): d592b82

async test

Browse files
Files changed (3) hide show
  1. app.py +23 -16
  2. requirements.txt +1 -2
  3. validation.py +12 -10
app.py CHANGED
@@ -4,26 +4,27 @@ import time
4
  import traceback
5
  from validation import validate_json, validate_croissant, validate_records
6
  import requests
 
7
 
8
- def process_file(file):
9
  results = []
10
 
11
  # Check 1: JSON validation
12
- json_valid, json_message, json_data = validate_json(file.name)
13
  results.append(("JSON Format Validation", json_valid, json_message))
14
 
15
  if not json_valid:
16
  return results
17
 
18
  # Check 2: Croissant validation
19
- croissant_valid, croissant_message = validate_croissant(json_data)
20
  results.append(("Croissant Schema Validation", croissant_valid, croissant_message))
21
 
22
  if not croissant_valid:
23
  return results
24
 
25
  # Check 3: Records validation
26
- records_valid, records_message = validate_records(json_data)
27
  results.append(("Records Generation Test", records_valid, records_message))
28
 
29
  return results
@@ -181,15 +182,16 @@ def create_ui():
181
 
182
  return """<div class="progress-status">βœ… File uploaded successfully</div>""", gr.update(visible=False)
183
 
184
- def fetch_from_url(url):
185
  if not url:
186
  return """<div class="progress-status">Please enter a URL</div>""", gr.update(visible=False)
187
 
188
  try:
189
  # Fetch JSON from URL
190
- response = requests.get(url, timeout=10)
191
- response.raise_for_status()
192
- json_data = response.json()
 
193
 
194
  # Show success message
195
  progress_html = """<div class="progress-status">βœ… JSON fetched successfully from URL</div>"""
@@ -198,18 +200,18 @@ def create_ui():
198
  results = []
199
  results.append(("JSON Format Validation", True, "βœ… The URL returned valid JSON."))
200
 
201
- croissant_valid, croissant_message = validate_croissant(json_data)
202
  results.append(("Croissant Schema Validation", croissant_valid, croissant_message))
203
 
204
  if not croissant_valid:
205
  return progress_html, build_results_html(results)
206
 
207
- records_valid, records_message = validate_records(json_data)
208
  results.append(("Records Generation Test", records_valid, records_message))
209
 
210
  return progress_html, build_results_html(results)
211
 
212
- except requests.exceptions.RequestException as e:
213
  error_message = f"❌ Error fetching URL: {str(e)}"
214
  return f"""<div class="progress-status">{error_message}</div>""", gr.update(visible=False)
215
  except json.JSONDecodeError as e:
@@ -254,19 +256,19 @@ def create_ui():
254
  html += '</div>'
255
  return gr.update(value=html, visible=True)
256
 
257
- def on_validate(file):
258
  if file is None:
259
  return gr.update(visible=False)
260
 
261
  # Process the file and get results
262
- results = process_file(file)
263
  return build_results_html(results)
264
 
265
  # Connect UI events to functions
266
  tabs.select(on_tab_change, None, [active_tab, upload_progress, validation_results])
267
  file_input.change(on_file_upload, inputs=file_input, outputs=[upload_progress, validation_results])
268
- validate_btn.click(on_validate, inputs=file_input, outputs=validation_results)
269
- fetch_btn.click(fetch_from_url, inputs=url_input, outputs=[upload_progress, validation_results])
270
 
271
  # Footer
272
  gr.HTML("""
@@ -279,4 +281,9 @@ def create_ui():
279
 
280
  if __name__ == "__main__":
281
  app = create_ui()
282
- app.launch()
 
 
 
 
 
 
4
  import traceback
5
  from validation import validate_json, validate_croissant, validate_records
6
  import requests
7
+ import aiohttp
8
 
9
+ async def process_file(file):
10
  results = []
11
 
12
  # Check 1: JSON validation
13
+ json_valid, json_message, json_data = await validate_json(file.name)
14
  results.append(("JSON Format Validation", json_valid, json_message))
15
 
16
  if not json_valid:
17
  return results
18
 
19
  # Check 2: Croissant validation
20
+ croissant_valid, croissant_message = await validate_croissant(json_data)
21
  results.append(("Croissant Schema Validation", croissant_valid, croissant_message))
22
 
23
  if not croissant_valid:
24
  return results
25
 
26
  # Check 3: Records validation
27
+ records_valid, records_message = await validate_records(json_data)
28
  results.append(("Records Generation Test", records_valid, records_message))
29
 
30
  return results
 
182
 
183
  return """<div class="progress-status">βœ… File uploaded successfully</div>""", gr.update(visible=False)
184
 
185
+ async def fetch_from_url(url):
186
  if not url:
187
  return """<div class="progress-status">Please enter a URL</div>""", gr.update(visible=False)
188
 
189
  try:
190
  # Fetch JSON from URL
191
+ async with aiohttp.ClientSession() as session:
192
+ async with session.get(url, timeout=10) as response:
193
+ response.raise_for_status()
194
+ json_data = await response.json()
195
 
196
  # Show success message
197
  progress_html = """<div class="progress-status">βœ… JSON fetched successfully from URL</div>"""
 
200
  results = []
201
  results.append(("JSON Format Validation", True, "βœ… The URL returned valid JSON."))
202
 
203
+ croissant_valid, croissant_message = await validate_croissant(json_data)
204
  results.append(("Croissant Schema Validation", croissant_valid, croissant_message))
205
 
206
  if not croissant_valid:
207
  return progress_html, build_results_html(results)
208
 
209
+ records_valid, records_message = await validate_records(json_data)
210
  results.append(("Records Generation Test", records_valid, records_message))
211
 
212
  return progress_html, build_results_html(results)
213
 
214
+ except aiohttp.ClientError as e:
215
  error_message = f"❌ Error fetching URL: {str(e)}"
216
  return f"""<div class="progress-status">{error_message}</div>""", gr.update(visible=False)
217
  except json.JSONDecodeError as e:
 
256
  html += '</div>'
257
  return gr.update(value=html, visible=True)
258
 
259
+ async def on_validate(file):
260
  if file is None:
261
  return gr.update(visible=False)
262
 
263
  # Process the file and get results
264
+ results = await process_file(file)
265
  return build_results_html(results)
266
 
267
  # Connect UI events to functions
268
  tabs.select(on_tab_change, None, [active_tab, upload_progress, validation_results])
269
  file_input.change(on_file_upload, inputs=file_input, outputs=[upload_progress, validation_results])
270
+ validate_btn.click(fn=on_validate, inputs=file_input, outputs=validation_results)
271
+ fetch_btn.click(fn=fetch_from_url, inputs=url_input, outputs=[upload_progress, validation_results])
272
 
273
  # Footer
274
  gr.HTML("""
 
281
 
282
  if __name__ == "__main__":
283
  app = create_ui()
284
+ app.launch(
285
+ max_threads=1, # Reduce thread conflicts
286
+ show_error=True,
287
+ queue_timeout=300, # Match our 5-minute timeout
288
+ share=False
289
+ )
requirements.txt CHANGED
@@ -4,8 +4,7 @@ gradio>=3.50.2
4
  func_timeout
5
  requests
6
  huggingface-hub==0.30.1
7
- fsspec==2025.3.2
8
- gcsfs==2025.3.2
9
  aiohttp==3.11.15
10
  aiohappyeyeballs==2.6.1
11
  pandas==2.2.2
 
4
  func_timeout
5
  requests
6
  huggingface-hub==0.30.1
7
+ fsspec==2023.10.0
 
8
  aiohttp==3.11.15
9
  aiohappyeyeballs==2.6.1
10
  pandas==2.2.2
validation.py CHANGED
@@ -1,11 +1,12 @@
1
  import json
2
  import mlcroissant as mlc
3
  import traceback
4
- import func_timeout
 
5
 
6
  WAIT_TIME = 5 * 60 # seconds
7
 
8
- def validate_json(file_path):
9
  """Validate that the file is proper JSON."""
10
  try:
11
  with open(file_path, 'r') as f:
@@ -18,7 +19,7 @@ def validate_json(file_path):
18
  error_message = f"❌ Error reading file: {str(e)}"
19
  return False, error_message, None
20
 
21
- def validate_croissant(json_data):
22
  """Validate that the JSON follows Croissant schema."""
23
  try:
24
  dataset = mlc.Dataset(jsonld=json_data)
@@ -32,7 +33,7 @@ def validate_croissant(json_data):
32
  error_message = f"❌ Unexpected error during validation: {str(e)}\n\n{error_details}"
33
  return False, error_message
34
 
35
- def validate_records(json_data):
36
  """Validate that records can be generated within the time limit."""
37
  try:
38
  dataset = mlc.Dataset(jsonld=json_data)
@@ -45,12 +46,13 @@ def validate_records(json_data):
45
 
46
  for record_set in record_sets:
47
  try:
48
- records = dataset.records(record_set=record_set.uuid)
49
- print(records)
50
- _ = func_timeout.func_timeout(300, lambda: next(iter(records)))
51
- results.append(f"βœ… Record set '{record_set.uuid}' passed validation.")
52
- except func_timeout.exceptions.FunctionTimedOut:
53
- error_message = f"❌ Record set '{record_set.uuid}' generation took too long (>300s)"
 
54
  return False, error_message
55
  except Exception as e:
56
  error_details = traceback.format_exc()
 
1
  import json
2
  import mlcroissant as mlc
3
  import traceback
4
+ import asyncio
5
+ from asyncio import timeout
6
 
7
  WAIT_TIME = 5 * 60 # seconds
8
 
9
+ async def validate_json(file_path):
10
  """Validate that the file is proper JSON."""
11
  try:
12
  with open(file_path, 'r') as f:
 
19
  error_message = f"❌ Error reading file: {str(e)}"
20
  return False, error_message, None
21
 
22
+ async def validate_croissant(json_data):
23
  """Validate that the JSON follows Croissant schema."""
24
  try:
25
  dataset = mlc.Dataset(jsonld=json_data)
 
33
  error_message = f"❌ Unexpected error during validation: {str(e)}\n\n{error_details}"
34
  return False, error_message
35
 
36
+ async def validate_records(json_data):
37
  """Validate that records can be generated within the time limit."""
38
  try:
39
  dataset = mlc.Dataset(jsonld=json_data)
 
46
 
47
  for record_set in record_sets:
48
  try:
49
+ async with timeout(WAIT_TIME):
50
+ records = dataset.records(record_set=record_set.uuid)
51
+ print(records)
52
+ _ = next(iter(records))
53
+ results.append(f"βœ… Record set '{record_set.uuid}' passed validation.")
54
+ except asyncio.TimeoutError:
55
+ error_message = f"❌ Record set '{record_set.uuid}' generation took too long (>{WAIT_TIME}s)"
56
  return False, error_message
57
  except Exception as e:
58
  error_details = traceback.format_exc()