import streamlit as st from datasets import load_dataset, concatenate_datasets import os def load_and_combine_datasets(): python_codes_dataset = load_dataset('flytech/python-codes-25k', split='train') streamlit_issues_dataset = load_dataset("andfanilo/streamlit-issues", split='train') streamlit_docs_dataset = load_dataset("sai-lohith/streamlit_docs", split='train') combined_dataset = concatenate_datasets([python_codes_dataset, streamlit_issues_dataset, streamlit_docs_dataset]) return combined_dataset def save_combined_dataset(combined_dataset, dataset_name): combined_dataset.save_to_disk(dataset_name) # Print the location where the dataset is saved st.write(f"Dataset saved at: {dataset_name}") def main(): st.title("Combined Dataset Viewer") # Load and combine datasets combined_dataset = load_and_combine_datasets() # Display a subset of the combined dataset st.write("Subset of Combined Dataset:", combined_dataset[:10]) # Add option to save the combined dataset if st.button("Save Combined Dataset"): dataset_name = st.text_input("Enter a name for the combined dataset:") if dataset_name: save_combined_dataset(combined_dataset, dataset_name) st.success(f"Combined dataset saved as '{dataset_name}'!") # Add option to download the combined dataset if st.button("Download Combined Dataset"): dataset_name = st.text_input("Enter a name for the combined dataset:") if dataset_name: save_combined_dataset(combined_dataset, dataset_name) filepath = os.path.join(os.getcwd(), dataset_name) filesize = os.path.getsize(filepath) / (1024 * 1024) # Size in MB st.write(f"Download the combined dataset: [{dataset_name}]({filepath})") st.write(f"File Size: {filesize:.2f} MB") if __name__ == "__main__": main()