Saboorhsn commited on
Commit
30585ac
·
verified ·
1 Parent(s): 18adb0d

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +12 -4
app.py CHANGED
@@ -1,6 +1,7 @@
1
  import streamlit as st
2
  from datasets import load_dataset, concatenate_datasets
3
  import json
 
4
 
5
  def load_and_combine_datasets():
6
  python_codes_dataset = load_dataset('flytech/python-codes-25k', split='train')
@@ -18,7 +19,7 @@ def save_combined_dataset_as_jsonl(combined_dataset, file_path):
18
  f.write('\n')
19
 
20
  def main():
21
- st.title("Combined Dataset Viewer")
22
 
23
  # Load and combine datasets
24
  combined_dataset = load_and_combine_datasets()
@@ -26,18 +27,25 @@ def main():
26
  # Display a subset of the combined dataset
27
  st.write("Subset of Combined Dataset:", combined_dataset[:10])
28
 
 
 
 
29
  # Add option to save the combined dataset as JSONL
30
  if st.button("Save Combined Dataset (JSONL)"):
31
- file_path = "combined_dataset.jsonl"
32
  save_combined_dataset_as_jsonl(combined_dataset, file_path)
33
  st.write(f"Combined dataset saved as JSONL file: {file_path}")
34
 
35
  # Add option to download the JSONL file
36
  if st.button("Download Combined Dataset (JSONL)"):
37
- file_path = "combined_dataset.jsonl"
38
  save_combined_dataset_as_jsonl(combined_dataset, file_path)
39
  st.write("Download the combined dataset as JSONL file:")
40
- st.download_button(label="Download", data=open(file_path, "rb"), file_name="combined_dataset.jsonl")
 
 
 
 
41
 
42
  if __name__ == "__main__":
43
  main()
 
1
  import streamlit as st
2
  from datasets import load_dataset, concatenate_datasets
3
  import json
4
+ import os
5
 
6
  def load_and_combine_datasets():
7
  python_codes_dataset = load_dataset('flytech/python-codes-25k', split='train')
 
19
  f.write('\n')
20
 
21
  def main():
22
+ st.title("Combined Dataset Viewer and Downloader")
23
 
24
  # Load and combine datasets
25
  combined_dataset = load_and_combine_datasets()
 
27
  # Display a subset of the combined dataset
28
  st.write("Subset of Combined Dataset:", combined_dataset[:10])
29
 
30
+ # Take input for output dataset name
31
+ output_dataset_name = st.text_input("Enter output dataset name (without extension):", "combined_dataset")
32
+
33
  # Add option to save the combined dataset as JSONL
34
  if st.button("Save Combined Dataset (JSONL)"):
35
+ file_path = os.path.join(os.getcwd(), f"{output_dataset_name}.jsonl")
36
  save_combined_dataset_as_jsonl(combined_dataset, file_path)
37
  st.write(f"Combined dataset saved as JSONL file: {file_path}")
38
 
39
  # Add option to download the JSONL file
40
  if st.button("Download Combined Dataset (JSONL)"):
41
+ file_path = os.path.join(os.getcwd(), f"{output_dataset_name}.jsonl")
42
  save_combined_dataset_as_jsonl(combined_dataset, file_path)
43
  st.write("Download the combined dataset as JSONL file:")
44
+ with open(file_path, "rb") as f:
45
+ bytes_data = f.read()
46
+ b64 = base64.b64encode(bytes_data).decode()
47
+ href = f'<a href="data:file/jsonl;base64,{b64}" download="{output_dataset_name}.jsonl">Download JSONL File</a>'
48
+ st.markdown(href, unsafe_allow_html=True)
49
 
50
  if __name__ == "__main__":
51
  main()