Spaces:
Sleeping
Sleeping
| import streamlit as st | |
| import pandas as pd | |
| import matplotlib.pyplot as plt | |
| from streamlit.components.v1 import html | |
| import nbformat | |
| from nbconvert import HTMLExporter | |
| from wordcloud import WordCloud | |
| # Load the CSV data | |
| file_path = 'category upwork jobs.csv' | |
| jobs_df = pd.read_csv(file_path) | |
| # Adjust column names as per the CSV | |
| category_column = 'category' # Replace with the actual column name for category | |
| job_title_column = 'title' # Replace with the actual column name for job title | |
| description_column = 'Description' | |
| key_column = 'key' | |
| date_column = 'Date' | |
| # Sidebar menu | |
| st.sidebar.title("Navigation") | |
| option = st.sidebar.radio("Go to", ["Home", "Plots", "Notebook","Download Datasets"]) | |
| # Home Page: Display data with category filter | |
| if option == "Home": | |
| st.title("Jobs Dashboard") | |
| # Filter Jobs by Category | |
| st.sidebar.header("Filter Jobs by Category") | |
| categories = jobs_df[category_column].unique() # Extract unique categories | |
| selected_category = st.sidebar.selectbox("Choose a category:", categories) | |
| # Filter jobs based on the selected category | |
| filtered_jobs = jobs_df[jobs_df[category_column] == selected_category] | |
| # Display filtered jobs with additional columns | |
| st.write(f"Showing jobs in category: **{selected_category}**") | |
| st.dataframe(filtered_jobs[['title','key','description','date']]) | |
| # Optional: Show a count of jobs in the selected category | |
| st.write(f"Total jobs in this category: {len(filtered_jobs)}") | |
| # Plots Page: Display category distribution plot | |
| elif option == "Plots": | |
| st.title("Job Visualization") | |
| # 1. Job Category Distribution Bar Plot | |
| st.subheader("Job Category Distribution") | |
| category_counts = jobs_df[category_column].value_counts() | |
| fig1, ax1 = plt.subplots() | |
| ax1.bar(category_counts.index, category_counts.values) | |
| ax1.set_xlabel("Job Category") | |
| ax1.set_ylabel("Number of Jobs") | |
| ax1.set_title("Distribution of Jobs Across Categories") | |
| plt.xticks(rotation=45, ha="right") | |
| st.pyplot(fig1) | |
| # 2. Pie Chart for Category Distribution | |
| st.subheader("Job Category Proportions") | |
| fig2, ax2 = plt.subplots(figsize=(10, 25)) | |
| ax2.pie(category_counts, labels=category_counts.index, autopct='%1.1f%%', startangle=140) | |
| ax2.axis('equal') # Equal aspect ratio ensures the pie chart is circular. | |
| st.pyplot(fig2) | |
| # 4. Top Job Titles Bar Plot | |
| st.subheader("Top Job Titles") | |
| top_job_titles = jobs_df[job_title_column].value_counts().head(10) | |
| fig4, ax4 = plt.subplots() | |
| ax4.bar(top_job_titles.index, top_job_titles.values) | |
| ax4.set_xlabel("Job Title") | |
| ax4.set_ylabel("Count") | |
| ax4.set_title("Top 10 Job Titles") | |
| plt.xticks(rotation=45, ha="right") | |
| st.pyplot(fig4) | |
| # 5. Word Cloud for Job Descriptions | |
| st.subheader("Word Cloud for Job Descriptions") | |
| wordcloud = WordCloud(width=800, height=400, background_color='white').generate(' '.join(jobs_df[description_column].dropna())) | |
| fig5, ax5 = plt.subplots(figsize=(10, 5)) | |
| ax5.imshow(wordcloud, interpolation='bilinear') | |
| ax5.axis('off') # Turn off the axis | |
| st.pyplot(fig5) | |
| elif option == "Notebook": | |
| st.title("Jupyter Notebook") | |
| # Load and convert the notebook to HTML | |
| notebook_path = 'upwork_dashboard.ipynb' # Update with the actual path to your notebook | |
| with open(notebook_path) as f: | |
| notebook_content = nbformat.read(f, as_version=4) | |
| # Create a new markdown cell with the link to the Google Colab notebook | |
| colab_link = "[Open in Google Colab](https://colab.research.google.com/drive/1qoTldQ-Kr6DgePRNYgdlQqqHq5JQax0h?usp=sharing)" | |
| new_cell = nbformat.v4.new_markdown_cell(colab_link) | |
| # Insert the new cell at the top of the notebook | |
| notebook_content.cells.insert(0, new_cell) | |
| # Export the notebook to HTML | |
| html_exporter = HTMLExporter() | |
| html_exporter.exclude_input = False # Include code cells in the notebook display | |
| notebook_html, _ = html_exporter.from_notebook_node(notebook_content) | |
| # Display the notebook HTML in Streamlit | |
| html(notebook_html, height=800, scrolling=True) | |
| # Notebook Page: Render the Jupyter Notebook | |
| elif option == "Download Datasets": | |
| st.title("Download Datasets") | |
| d=pd.read_csv("category upwork jobs.csv") | |
| d1=pd.read_csv("jobs.csv") | |
| # Download links for the datasets | |
| st.markdown("Click the links below to download the datasets:") | |
| # Link for category upwork jobs dataset | |
| with open("category upwork jobs.csv", 'rb') as f: | |
| st.download_button( | |
| label="Download Category Upwork Jobs Dataset", | |
| data=f, | |
| file_name='category_upwork_jobs.csv', | |
| mime='text/csv' | |
| ) | |
| st.dataframe(d) | |
| # Link for the original dataset | |
| with open("jobs.csv", 'rb') as f: | |
| st.download_button( | |
| label="Download Original Dataset", | |
| data=f, | |
| file_name='origina scraped data.csv', | |
| mime='text/csv' | |
| ) | |
| st.dataframe(d1) | |