luisoala commited on
Commit
d5f5654
Β·
1 Parent(s): 842b607

basic checker

Browse files
Files changed (3) hide show
  1. app.py +178 -0
  2. requirements.txt +3 -0
  3. validation.py +63 -0
app.py ADDED
@@ -0,0 +1,178 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ import json
3
+ import time
4
+ import traceback
5
+ from validation import validate_json, validate_croissant, validate_records
6
+
7
+ def process_file(file):
8
+ results = []
9
+
10
+ # Check 1: JSON validation
11
+ json_valid, json_message, json_data = validate_json(file.name)
12
+ results.append(("JSON Format Validation", json_valid, json_message))
13
+
14
+ if not json_valid:
15
+ return results
16
+
17
+ # Check 2: Croissant validation
18
+ croissant_valid, croissant_message = validate_croissant(json_data)
19
+ results.append(("Croissant Schema Validation", croissant_valid, croissant_message))
20
+
21
+ if not croissant_valid:
22
+ return results
23
+
24
+ # Check 3: Records validation
25
+ records_valid, records_message = validate_records(json_data)
26
+ results.append(("Records Generation Test", records_valid, records_message))
27
+
28
+ return results
29
+
30
+ def create_ui():
31
+ with gr.Blocks(theme=gr.themes.Soft()) as app:
32
+ gr.Markdown("# Croissant JSON-LD Validator for NeurIPS")
33
+ gr.Markdown("""
34
+ Upload your Croissant JSON-LD file to validate if it meets the requirements for NeurIPS submission.
35
+ The validator will check:
36
+ 1. If the file is valid JSON
37
+ 2. If it passes Croissant schema validation
38
+ 3. If records can be generated within a reasonable time
39
+ """)
40
+
41
+ with gr.Row():
42
+ file_input = gr.File(label="Upload Croissant JSON-LD File", file_types=[".json", ".jsonld"])
43
+
44
+ upload_progress = gr.HTML(
45
+ """<div class="progress-container">
46
+ <div class="progress-status">Ready for upload</div>
47
+ </div>""", visible=True)
48
+
49
+ validate_btn = gr.Button("Validate", variant="primary")
50
+
51
+ # Create containers for each validation step
52
+ validation_results = gr.HTML(visible=False)
53
+
54
+ # Define CSS for the validation UI
55
+ gr.HTML("""
56
+ <style>
57
+ .validation-step {
58
+ margin-bottom: 15px;
59
+ border: 1px solid #e0e0e0;
60
+ border-radius: 8px;
61
+ overflow: hidden;
62
+ }
63
+ .step-header {
64
+ padding: 10px 15px;
65
+ background-color: #f5f5f5;
66
+ display: flex;
67
+ align-items: center;
68
+ cursor: pointer;
69
+ }
70
+ .step-status {
71
+ margin-right: 10px;
72
+ width: 24px;
73
+ height: 24px;
74
+ border-radius: 50%;
75
+ display: flex;
76
+ align-items: center;
77
+ justify-content: center;
78
+ font-weight: bold;
79
+ color: white;
80
+ }
81
+ .status-waiting {
82
+ background-color: #9e9e9e;
83
+ }
84
+ .status-success {
85
+ background-color: #4caf50;
86
+ }
87
+ .status-error {
88
+ background-color: #f44336;
89
+ }
90
+ .step-title {
91
+ font-weight: 500;
92
+ }
93
+ .step-details {
94
+ padding: 15px;
95
+ background-color: #fafafa;
96
+ border-top: 1px solid #e0e0e0;
97
+ white-space: pre-wrap;
98
+ font-family: monospace;
99
+ max-height: 300px;
100
+ overflow-y: auto;
101
+ }
102
+ .progress-container {
103
+ margin: 10px 0;
104
+ font-weight: 500;
105
+ text-align: center;
106
+ }
107
+ </style>
108
+ """)
109
+
110
+ def on_file_upload(file):
111
+ if file is None:
112
+ return """<div class="progress-container">
113
+ <div class="progress-status">Ready for upload</div>
114
+ </div>"""
115
+
116
+ return """<div class="progress-container">
117
+ <div class="progress-status">βœ… File uploaded successfully</div>
118
+ </div>"""
119
+
120
+ def on_validate(file):
121
+ if file is None:
122
+ return gr.update(visible=False)
123
+
124
+ # Process the file and get results
125
+ results = process_file(file)
126
+
127
+ # Build the HTML for validation results
128
+ html = '<div class="validation-results">'
129
+
130
+ for i, (test_name, passed, message) in enumerate(results):
131
+ # Determine status class
132
+ status_class = "status-success" if passed else "status-error"
133
+ status_icon = "βœ“" if passed else "βœ—"
134
+
135
+ html += f'''
136
+ <div class="validation-step" id="step-{i}">
137
+ <div class="step-header" onclick="toggleDetails({i})">
138
+ <div class="step-status {status_class}">{status_icon}</div>
139
+ <div class="step-title">{test_name}</div>
140
+ </div>
141
+ <div class="step-details" id="details-{i}" style="display: none;">
142
+ {message}
143
+ </div>
144
+ </div>
145
+ '''
146
+
147
+ html += '</div>'
148
+
149
+ # Add JavaScript to handle toggling details
150
+ html += '''
151
+ <script>
152
+ function toggleDetails(id) {
153
+ const details = document.getElementById('details-' + id);
154
+ if (details.style.display === 'none') {
155
+ details.style.display = 'block';
156
+ } else {
157
+ details.style.display = 'none';
158
+ }
159
+ }
160
+ </script>
161
+ '''
162
+
163
+ return gr.update(value=html, visible=True)
164
+
165
+ file_input.change(on_file_upload, inputs=file_input, outputs=upload_progress)
166
+ validate_btn.click(on_validate, inputs=file_input, outputs=validation_results)
167
+
168
+ gr.HTML("""
169
+ <div style="text-align: center; margin-top: 20px;">
170
+ <p>Based on the <a href="https://github.com/mlcommons/croissant" target="_blank">Croissant standard</a> from MLCommons.</p>
171
+ </div>
172
+ """)
173
+
174
+ return app
175
+
176
+ if __name__ == "__main__":
177
+ app = create_ui()
178
+ app.launch()
requirements.txt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ gradio>=3.50.2
2
+ mlcroissant
3
+ func_timeout
validation.py ADDED
@@ -0,0 +1,63 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import json
2
+ import traceback
3
+ import mlcroissant as mlc
4
+ import func_timeout
5
+
6
+ ONE_MINUTE = 60 # seconds
7
+
8
+ def validate_json(file_path):
9
+ """Validate that the file is proper JSON."""
10
+ try:
11
+ with open(file_path, 'r') as f:
12
+ json_data = json.load(f)
13
+ return True, "βœ… The file is valid JSON.", json_data
14
+ except json.JSONDecodeError as e:
15
+ error_message = f"❌ Invalid JSON format: {str(e)}"
16
+ return False, error_message, None
17
+ except Exception as e:
18
+ error_message = f"❌ Error reading file: {str(e)}"
19
+ return False, error_message, None
20
+
21
+ def validate_croissant(json_data):
22
+ """Validate that the JSON follows Croissant schema."""
23
+ try:
24
+ dataset = mlc.Dataset(jsonld=json_data)
25
+ return True, "βœ… The dataset passes Croissant validation."
26
+ except mlc.ValidationError as e:
27
+ error_details = traceback.format_exc()
28
+ error_message = f"❌ Validation failed: {str(e)}\n\n{error_details}"
29
+ return False, error_message
30
+ except Exception as e:
31
+ error_details = traceback.format_exc()
32
+ error_message = f"❌ Unexpected error during validation: {str(e)}\n\n{error_details}"
33
+ return False, error_message
34
+
35
+ def validate_records(json_data):
36
+ """Validate that records can be generated within the time limit."""
37
+ try:
38
+ dataset = mlc.Dataset(jsonld=json_data)
39
+ record_sets = dataset.metadata.record_sets
40
+
41
+ if not record_sets:
42
+ return True, "βœ… No record sets found to validate."
43
+
44
+ results = []
45
+
46
+ for record_set in record_sets:
47
+ try:
48
+ records = dataset.records(record_set=record_set.name)
49
+ _ = func_timeout.func_timeout(ONE_MINUTE, lambda: next(iter(records)))
50
+ results.append(f"βœ… Record set '{record_set.name}' passed validation.")
51
+ except func_timeout.exceptions.FunctionTimedOut:
52
+ error_message = f"❌ Record set '{record_set.name}' generation took too long (>60s)"
53
+ return False, error_message
54
+ except Exception as e:
55
+ error_details = traceback.format_exc()
56
+ error_message = f"❌ Record set '{record_set.name}' failed: {str(e)}\n\n{error_details}"
57
+ return False, error_message
58
+
59
+ return True, "\n".join(results)
60
+ except Exception as e:
61
+ error_details = traceback.format_exc()
62
+ error_message = f"❌ Unexpected error during records validation: {str(e)}\n\n{error_details}"
63
+ return False, error_message