Spaces:
Build error
Build error
Commit
·
dffeab2
1
Parent(s):
09d0101
Added download button for JSON file download.
Browse files
app.py
CHANGED
@@ -3,6 +3,7 @@ from bs4 import BeautifulSoup
|
|
3 |
import re
|
4 |
from urllib.parse import urlparse
|
5 |
import gradio as gr
|
|
|
6 |
|
7 |
|
8 |
def extract_wikipedia_text(raw_text, language):
|
@@ -47,10 +48,13 @@ def scrape(url):
|
|
47 |
)
|
48 |
contents = extract_wikipedia_text(raw_text, language)
|
49 |
json_output = {"source": url, f"title-{language}": title, "pages": contents}
|
50 |
-
|
|
|
|
|
51 |
|
52 |
|
53 |
-
|
|
|
54 |
gr.Markdown(
|
55 |
f"""
|
56 |
<center>
|
@@ -60,8 +64,10 @@ with gr.Blocks() as demo:
|
|
60 |
)
|
61 |
with gr.Row():
|
62 |
inp = gr.Textbox(placeholder="Wikipedia URL")
|
63 |
-
|
|
|
|
|
64 |
btn = gr.Button("Scrape")
|
65 |
-
btn.click(fn=scrape, inputs=inp, outputs=out)
|
66 |
|
67 |
-
demo.launch()
|
|
|
3 |
import re
|
4 |
from urllib.parse import urlparse
|
5 |
import gradio as gr
|
6 |
+
import json
|
7 |
|
8 |
|
9 |
def extract_wikipedia_text(raw_text, language):
|
|
|
48 |
)
|
49 |
contents = extract_wikipedia_text(raw_text, language)
|
50 |
json_output = {"source": url, f"title-{language}": title, "pages": contents}
|
51 |
+
with open("data.json", "w") as f:
|
52 |
+
json.dump(json_output, f)
|
53 |
+
return json_output, "data.json"
|
54 |
|
55 |
|
56 |
+
style_sheet = "#json-output { max-height: 400px; overflow-y: auto; }"
|
57 |
+
with gr.Blocks(css=style_sheet) as demo:
|
58 |
gr.Markdown(
|
59 |
f"""
|
60 |
<center>
|
|
|
64 |
)
|
65 |
with gr.Row():
|
66 |
inp = gr.Textbox(placeholder="Wikipedia URL")
|
67 |
+
with gr.Column():
|
68 |
+
out = gr.JSON(elem_id="json-output")
|
69 |
+
out_download = gr.File()
|
70 |
btn = gr.Button("Scrape")
|
71 |
+
btn.click(fn=scrape, inputs=inp, outputs=[out, out_download])
|
72 |
|
73 |
+
demo.launch(debug=True)
|