luisoala commited on
Commit
f51aacc
Β·
1 Parent(s): e85412e

url ingest

Browse files
Files changed (2) hide show
  1. app.py +82 -18
  2. requirements.txt +2 -1
app.py CHANGED
@@ -3,6 +3,7 @@ import json
3
  import time
4
  import traceback
5
  from validation import validate_json, validate_croissant, validate_records
 
6
 
7
  def process_file(file):
8
  results = []
@@ -31,24 +32,32 @@ def create_ui():
31
  with gr.Blocks(theme=gr.themes.Soft()) as app:
32
  gr.Markdown("# Croissant JSON-LD Validator for NeurIPS")
33
  gr.Markdown("""
34
- Upload your Croissant JSON-LD file to validate if it meets the requirements for NeurIPS submission.
35
  The validator will check:
36
  1. If the file is valid JSON
37
  2. If it passes Croissant schema validation
38
  3. If records can be generated within a reasonable time
39
  """)
40
 
41
- with gr.Row():
42
- file_input = gr.File(label="Upload Croissant JSON-LD File", file_types=[".json", ".jsonld"])
 
 
 
 
 
 
 
 
43
 
44
  upload_progress = gr.HTML(
45
  """<div class="progress-container">
46
- <div class="progress-status">Ready for upload</div>
47
  </div>""", visible=True)
48
 
49
- validate_btn = gr.Button("Validate", variant="primary")
50
 
51
- # Create containers for each validation step
52
  validation_results = gr.HTML(visible=False)
53
 
54
  # Define CSS for the validation UI
@@ -119,22 +128,68 @@ def create_ui():
119
  <div class="progress-status">βœ… File uploaded successfully</div>
120
  </div>""", gr.update(visible=False)
121
 
122
- def on_validate(file):
123
- if file is None:
124
- return gr.update(visible=False)
125
-
126
- # Process the file and get results
127
- results = process_file(file)
128
 
129
- # Build the HTML for validation results
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
130
  html = '<div class="validation-results">'
131
 
132
  for i, (test_name, passed, message) in enumerate(results):
133
- # Determine status class
134
  status_class = "status-success" if passed else "status-error"
135
  status_icon = "βœ“" if passed else "βœ—"
136
 
137
- # Use direct inline JavaScript manipulation instead of a function call
138
  html += f'''
139
  <div class="validation-step" id="step-{i}">
140
  <div class="step-header" onclick="
@@ -160,19 +215,28 @@ def create_ui():
160
  '''
161
 
162
  html += '</div>'
163
-
164
- # No separate JavaScript function needed
165
  return gr.update(value=html, visible=True)
166
 
 
 
 
 
 
 
 
 
 
167
  file_input.change(on_file_upload, inputs=file_input, outputs=[upload_progress, validation_results])
168
  validate_btn.click(on_validate, inputs=file_input, outputs=validation_results)
 
169
 
 
170
  gr.HTML("""
171
  <div style="text-align: center; margin-top: 20px;">
172
  <p>Based on the <a href="https://github.com/mlcommons/croissant" target="_blank">Croissant format</a> from MLCommons.</p>
173
  </div>
174
  """)
175
-
176
  return app
177
 
178
  if __name__ == "__main__":
 
3
  import time
4
  import traceback
5
  from validation import validate_json, validate_croissant, validate_records
6
+ import requests
7
 
8
  def process_file(file):
9
  results = []
 
32
  with gr.Blocks(theme=gr.themes.Soft()) as app:
33
  gr.Markdown("# Croissant JSON-LD Validator for NeurIPS")
34
  gr.Markdown("""
35
+ Upload your Croissant JSON-LD file or enter a URL to validate if it meets the requirements for NeurIPS submission.
36
  The validator will check:
37
  1. If the file is valid JSON
38
  2. If it passes Croissant schema validation
39
  3. If records can be generated within a reasonable time
40
  """)
41
 
42
+ with gr.Tabs() as tabs:
43
+ with gr.TabItem("Upload File"):
44
+ file_input = gr.File(label="Upload Croissant JSON-LD File", file_types=[".json", ".jsonld"])
45
+
46
+ with gr.TabItem("URL Input"):
47
+ url_input = gr.Textbox(
48
+ label="Enter Croissant JSON-LD URL",
49
+ placeholder="https://huggingface.co/api/datasets/facebook/natural_reasoning/croissant"
50
+ )
51
+ fetch_btn = gr.Button("Fetch and Validate", variant="primary")
52
 
53
  upload_progress = gr.HTML(
54
  """<div class="progress-container">
55
+ <div class="progress-status">Ready for validation</div>
56
  </div>""", visible=True)
57
 
58
+ validate_btn = gr.Button("Validate Uploaded File", variant="primary")
59
 
60
+ # Create containers for validation results
61
  validation_results = gr.HTML(visible=False)
62
 
63
  # Define CSS for the validation UI
 
128
  <div class="progress-status">βœ… File uploaded successfully</div>
129
  </div>""", gr.update(visible=False)
130
 
131
+ def fetch_from_url(url):
132
+ if not url:
133
+ return """<div class="progress-container">
134
+ <div class="progress-status">Please enter a URL</div>
135
+ </div>""", gr.update(visible=False)
 
136
 
137
+ try:
138
+ # Fetch JSON from URL
139
+ response = requests.get(url, timeout=10)
140
+ response.raise_for_status() # Raise exception for 4XX/5XX status codes
141
+
142
+ # Try to parse as JSON
143
+ json_data = response.json()
144
+
145
+ # Show success message
146
+ progress_html = """<div class="progress-container">
147
+ <div class="progress-status">βœ… JSON fetched successfully from URL</div>
148
+ </div>"""
149
+
150
+ # Validate the fetched JSON
151
+ results = []
152
+
153
+ # Check 1: JSON validation (already done by parsing)
154
+ results.append(("JSON Format Validation", True, "βœ… The URL returned valid JSON."))
155
+
156
+ # Check 2: Croissant validation
157
+ croissant_valid, croissant_message = validate_croissant(json_data)
158
+ results.append(("Croissant Schema Validation", croissant_valid, croissant_message))
159
+
160
+ if not croissant_valid:
161
+ return progress_html, build_results_html(results)
162
+
163
+ # Check 3: Records validation
164
+ records_valid, records_message = validate_records(json_data)
165
+ results.append(("Records Generation Test", records_valid, records_message))
166
+
167
+ return progress_html, build_results_html(results)
168
+
169
+ except requests.exceptions.RequestException as e:
170
+ error_message = f"❌ Error fetching URL: {str(e)}"
171
+ return f"""<div class="progress-container">
172
+ <div class="progress-status">{error_message}</div>
173
+ </div>""", gr.update(visible=False)
174
+ except json.JSONDecodeError as e:
175
+ error_message = f"❌ URL did not return valid JSON: {str(e)}"
176
+ return f"""<div class="progress-container">
177
+ <div class="progress-status">{error_message}</div>
178
+ </div>""", gr.update(visible=False)
179
+ except Exception as e:
180
+ error_message = f"❌ Unexpected error: {str(e)}"
181
+ return f"""<div class="progress-container">
182
+ <div class="progress-status">{error_message}</div>
183
+ </div>""", gr.update(visible=False)
184
+
185
+ def build_results_html(results):
186
+ # Extract the HTML generation logic to a reusable function
187
  html = '<div class="validation-results">'
188
 
189
  for i, (test_name, passed, message) in enumerate(results):
 
190
  status_class = "status-success" if passed else "status-error"
191
  status_icon = "βœ“" if passed else "βœ—"
192
 
 
193
  html += f'''
194
  <div class="validation-step" id="step-{i}">
195
  <div class="step-header" onclick="
 
215
  '''
216
 
217
  html += '</div>'
 
 
218
  return gr.update(value=html, visible=True)
219
 
220
+ def on_validate(file):
221
+ if file is None:
222
+ return gr.update(visible=False)
223
+
224
+ # Process the file and get results
225
+ results = process_file(file)
226
+ return build_results_html(results)
227
+
228
+ # Connect UI events to functions
229
  file_input.change(on_file_upload, inputs=file_input, outputs=[upload_progress, validation_results])
230
  validate_btn.click(on_validate, inputs=file_input, outputs=validation_results)
231
+ fetch_btn.click(fetch_from_url, inputs=url_input, outputs=[upload_progress, validation_results])
232
 
233
+ # Footer
234
  gr.HTML("""
235
  <div style="text-align: center; margin-top: 20px;">
236
  <p>Based on the <a href="https://github.com/mlcommons/croissant" target="_blank">Croissant format</a> from MLCommons.</p>
237
  </div>
238
  """)
239
+
240
  return app
241
 
242
  if __name__ == "__main__":
requirements.txt CHANGED
@@ -1,3 +1,4 @@
1
  gradio>=3.50.2
2
  mlcroissant
3
- func_timeout
 
 
1
  gradio>=3.50.2
2
  mlcroissant
3
+ func_timeout
4
+ requests