Upload 9 files
Browse files- .gitattributes +1 -0
- .replit +39 -0
- README.md +1 -14
- app.py +200 -0
- generated-icon.png +3 -0
- pyproject.toml +17 -0
- replit.nix +23 -0
- security_scanner.py +74 -0
- utils.py +300 -0
- uv.lock +0 -0
.gitattributes
CHANGED
|
@@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
|
|
| 33 |
*.zip filter=lfs diff=lfs merge=lfs -text
|
| 34 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
| 35 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
|
|
|
|
|
| 33 |
*.zip filter=lfs diff=lfs merge=lfs -text
|
| 34 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
| 35 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
| 36 |
+
generated-icon.png filter=lfs diff=lfs merge=lfs -text
|
.replit
ADDED
|
@@ -0,0 +1,39 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
modules = ["python-3.11"]
|
| 2 |
+
|
| 3 |
+
[nix]
|
| 4 |
+
channel = "stable-24_05"
|
| 5 |
+
|
| 6 |
+
[deployment]
|
| 7 |
+
deploymentTarget = "autoscale"
|
| 8 |
+
run = ["sh", "-c", "streamlit run app.py"]
|
| 9 |
+
|
| 10 |
+
[workflows]
|
| 11 |
+
runButton = "Project"
|
| 12 |
+
|
| 13 |
+
[[workflows.workflow]]
|
| 14 |
+
name = "Project"
|
| 15 |
+
mode = "parallel"
|
| 16 |
+
author = "agent"
|
| 17 |
+
|
| 18 |
+
[[workflows.workflow.tasks]]
|
| 19 |
+
task = "workflow.run"
|
| 20 |
+
args = "Streamlit Server"
|
| 21 |
+
|
| 22 |
+
[[workflows.workflow]]
|
| 23 |
+
name = "Streamlit Server"
|
| 24 |
+
author = "agent"
|
| 25 |
+
|
| 26 |
+
[workflows.workflow.metadata]
|
| 27 |
+
agentRequireRestartOnSave = false
|
| 28 |
+
|
| 29 |
+
[[workflows.workflow.tasks]]
|
| 30 |
+
task = "packager.installForAll"
|
| 31 |
+
|
| 32 |
+
[[workflows.workflow.tasks]]
|
| 33 |
+
task = "shell.exec"
|
| 34 |
+
args = "streamlit run app.py"
|
| 35 |
+
waitForPort = 5000
|
| 36 |
+
|
| 37 |
+
[[ports]]
|
| 38 |
+
localPort = 5000
|
| 39 |
+
externalPort = 80
|
README.md
CHANGED
|
@@ -1,16 +1,3 @@
|
|
| 1 |
-
---
|
| 2 |
-
title: PythonScriptShowcase
|
| 3 |
-
emoji: ⚡
|
| 4 |
-
colorFrom: blue
|
| 5 |
-
colorTo: yellow
|
| 6 |
-
sdk: streamlit
|
| 7 |
-
sdk_version: 1.42.2
|
| 8 |
-
app_file: app.py
|
| 9 |
-
pinned: true
|
| 10 |
-
license: mit
|
| 11 |
-
short_description: Python scripts and Hugging Face datasets
|
| 12 |
-
---
|
| 13 |
-
|
| 14 |
# Python & HuggingFace Explorer
|
| 15 |
|
| 16 |
A Streamlit-based demonstration platform for showcasing Python scripts and Hugging Face datasets with interactive visualization.
|
|
@@ -72,4 +59,4 @@ The application uses a custom styling inspired by Hugging Face:
|
|
| 72 |
|
| 73 |
## License
|
| 74 |
|
| 75 |
-
This project is open source and available under the MIT License.
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
# Python & HuggingFace Explorer
|
| 2 |
|
| 3 |
A Streamlit-based demonstration platform for showcasing Python scripts and Hugging Face datasets with interactive visualization.
|
|
|
|
| 59 |
|
| 60 |
## License
|
| 61 |
|
| 62 |
+
This project is open source and available under the MIT License.
|
app.py
ADDED
|
@@ -0,0 +1,200 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import streamlit as st
|
| 2 |
+
from components.code_editor import render_code_editor
|
| 3 |
+
from components.dataset_explorer import render_dataset_explorer
|
| 4 |
+
from components.visualization import render_visualization
|
| 5 |
+
from components.model_metrics import render_model_metrics
|
| 6 |
+
import os
|
| 7 |
+
import sys
|
| 8 |
+
import time
|
| 9 |
+
from utils import load_css, create_logo
|
| 10 |
+
|
| 11 |
+
# Page configuration
|
| 12 |
+
st.set_page_config(
|
| 13 |
+
page_title="Python & HuggingFace Explorer",
|
| 14 |
+
page_icon="🤗",
|
| 15 |
+
layout="wide",
|
| 16 |
+
initial_sidebar_state="expanded"
|
| 17 |
+
)
|
| 18 |
+
|
| 19 |
+
# Load custom CSS
|
| 20 |
+
load_css()
|
| 21 |
+
|
| 22 |
+
# Main content
|
| 23 |
+
def main():
|
| 24 |
+
# Create sidebar
|
| 25 |
+
with st.sidebar:
|
| 26 |
+
create_logo()
|
| 27 |
+
st.title("Navigation")
|
| 28 |
+
page = st.radio(
|
| 29 |
+
"Select a page:",
|
| 30 |
+
["Home", "Code Editor", "Dataset Explorer", "Visualizations", "Model Metrics"]
|
| 31 |
+
)
|
| 32 |
+
|
| 33 |
+
# HF Dataset search
|
| 34 |
+
st.sidebar.markdown("---")
|
| 35 |
+
st.sidebar.subheader("Dataset Quick Search")
|
| 36 |
+
dataset_name = st.sidebar.text_input("Enter a HuggingFace dataset name")
|
| 37 |
+
if dataset_name and st.sidebar.button("Load Dataset"):
|
| 38 |
+
st.session_state.dataset_name = dataset_name
|
| 39 |
+
if page != "Dataset Explorer":
|
| 40 |
+
st.sidebar.info("Dataset loaded! Go to Dataset Explorer to view it.")
|
| 41 |
+
|
| 42 |
+
st.sidebar.markdown("---")
|
| 43 |
+
st.sidebar.markdown("""
|
| 44 |
+
<div style="font-size: 0.8em; color: #666; text-align: center;">
|
| 45 |
+
<p>Built with ❤️ using</p>
|
| 46 |
+
<p>Streamlit & HuggingFace</p>
|
| 47 |
+
<p style="font-size: 0.9em; margin-top: 5px;">© 2025 Python Explorer</p>
|
| 48 |
+
</div>
|
| 49 |
+
""", unsafe_allow_html=True)
|
| 50 |
+
|
| 51 |
+
# Initialize session state for dataset
|
| 52 |
+
if 'dataset_name' not in st.session_state:
|
| 53 |
+
st.session_state.dataset_name = None
|
| 54 |
+
|
| 55 |
+
if 'code_content' not in st.session_state:
|
| 56 |
+
st.session_state.code_content = """# Sample Python code
|
| 57 |
+
from datasets import load_dataset
|
| 58 |
+
import pandas as pd
|
| 59 |
+
import matplotlib.pyplot as plt
|
| 60 |
+
|
| 61 |
+
# Load a dataset from Hugging Face
|
| 62 |
+
dataset = load_dataset("glue", "sst2", split="train")
|
| 63 |
+
df = pd.DataFrame(dataset)
|
| 64 |
+
|
| 65 |
+
# Display the first few rows
|
| 66 |
+
print(df.head())
|
| 67 |
+
|
| 68 |
+
# Simple analysis
|
| 69 |
+
print(f"Number of examples: {len(df)}")
|
| 70 |
+
print(f"Columns: {df.columns}")
|
| 71 |
+
|
| 72 |
+
# Visualize class distribution
|
| 73 |
+
plt.figure(figsize=(8, 5))
|
| 74 |
+
df['label'].value_counts().plot(kind='bar')
|
| 75 |
+
plt.title('Class Distribution')
|
| 76 |
+
plt.xlabel('Class')
|
| 77 |
+
plt.ylabel('Count')
|
| 78 |
+
plt.tight_layout()
|
| 79 |
+
plt.show()
|
| 80 |
+
"""
|
| 81 |
+
|
| 82 |
+
# Page content
|
| 83 |
+
if page == "Home":
|
| 84 |
+
render_home()
|
| 85 |
+
elif page == "Code Editor":
|
| 86 |
+
render_code_editor()
|
| 87 |
+
elif page == "Dataset Explorer":
|
| 88 |
+
render_dataset_explorer()
|
| 89 |
+
elif page == "Visualizations":
|
| 90 |
+
render_visualization()
|
| 91 |
+
elif page == "Model Metrics":
|
| 92 |
+
render_model_metrics()
|
| 93 |
+
|
| 94 |
+
def render_home():
|
| 95 |
+
# Display header image instead of using a title
|
| 96 |
+
from PIL import Image
|
| 97 |
+
import os
|
| 98 |
+
|
| 99 |
+
# Path to the logo image in the center of the page
|
| 100 |
+
center_logo_path = "assets/python_huggingface_logo.png"
|
| 101 |
+
|
| 102 |
+
# Check if the logo exists and display it
|
| 103 |
+
if os.path.exists(center_logo_path):
|
| 104 |
+
center_col1, center_col2, center_col3 = st.columns([1, 2, 1])
|
| 105 |
+
with center_col2:
|
| 106 |
+
image = Image.open(center_logo_path)
|
| 107 |
+
# Resize image to 25% of original dimensions
|
| 108 |
+
width, height = image.size
|
| 109 |
+
resized_image = image.resize((width//4, height//4))
|
| 110 |
+
st.image(resized_image, use_container_width=True)
|
| 111 |
+
else:
|
| 112 |
+
st.title("Python & HuggingFace Explorer")
|
| 113 |
+
|
| 114 |
+
# Introduction with improved styling
|
| 115 |
+
st.markdown("""
|
| 116 |
+
<div style="background-color: #FFFFFF; padding: 20px; border-radius: 10px; margin-bottom: 20px; box-shadow: 0 4px 6px rgba(0, 0, 0, 0.1);">
|
| 117 |
+
<h2 style="color: #2196F3; text-align: center;">Welcome to the Explorer!</h2>
|
| 118 |
+
<p style="font-size: 1.1em; line-height: 1.6;">This interactive platform brings together the power of Python and the HuggingFace ecosystem.
|
| 119 |
+
Write and execute code, explore datasets from the HuggingFace Hub, create beautiful visualizations,
|
| 120 |
+
and analyze model performance metrics - all in one seamless environment.</p>
|
| 121 |
+
</div>
|
| 122 |
+
""", unsafe_allow_html=True)
|
| 123 |
+
|
| 124 |
+
# Feature cards
|
| 125 |
+
col1, col2 = st.columns(2)
|
| 126 |
+
|
| 127 |
+
with col1:
|
| 128 |
+
st.markdown("""
|
| 129 |
+
<div style="background-color: #FFFFFF; padding: 20px; border-radius: 10px; height: 200px;">
|
| 130 |
+
<h3 style="color: #2196F3;">💻 Code Editor</h3>
|
| 131 |
+
<p>Write, edit, and execute Python code with syntax highlighting. See your results instantly and experiment with different scripts.</p>
|
| 132 |
+
<p>Features include:</p>
|
| 133 |
+
<ul>
|
| 134 |
+
<li>Syntax highlighting</li>
|
| 135 |
+
<li>Code execution</li>
|
| 136 |
+
<li>Output display</li>
|
| 137 |
+
</ul>
|
| 138 |
+
</div>
|
| 139 |
+
""", unsafe_allow_html=True)
|
| 140 |
+
|
| 141 |
+
st.markdown("""
|
| 142 |
+
<div style="background-color: #FFFFFF; padding: 20px; border-radius: 10px; margin-top: 20px; height: 200px;">
|
| 143 |
+
<h3 style="color: #2196F3;">📊 Visualizations</h3>
|
| 144 |
+
<p>Create and customize visualizations from your datasets. Explore data through charts, graphs, and interactive plots.</p>
|
| 145 |
+
<p>Visualization types:</p>
|
| 146 |
+
<ul>
|
| 147 |
+
<li>Bar charts & histograms</li>
|
| 148 |
+
<li>Scatter plots</li>
|
| 149 |
+
<li>Line charts</li>
|
| 150 |
+
<li>And more!</li>
|
| 151 |
+
</ul>
|
| 152 |
+
</div>
|
| 153 |
+
""", unsafe_allow_html=True)
|
| 154 |
+
|
| 155 |
+
with col2:
|
| 156 |
+
st.markdown("""
|
| 157 |
+
<div style="background-color: #FFFFFF; padding: 20px; border-radius: 10px; height: 200px;">
|
| 158 |
+
<h3 style="color: #2196F3;">🗃️ Dataset Explorer</h3>
|
| 159 |
+
<p>Browse and analyze datasets from the HuggingFace Hub. Filter, sort, and examine data with ease.</p>
|
| 160 |
+
<p>Explorer features:</p>
|
| 161 |
+
<ul>
|
| 162 |
+
<li>Dataset previews</li>
|
| 163 |
+
<li>Basic statistics</li>
|
| 164 |
+
<li>Filtering options</li>
|
| 165 |
+
<li>Data exports</li>
|
| 166 |
+
</ul>
|
| 167 |
+
</div>
|
| 168 |
+
""", unsafe_allow_html=True)
|
| 169 |
+
|
| 170 |
+
st.markdown("""
|
| 171 |
+
<div style="background-color: #FFFFFF; padding: 20px; border-radius: 10px; margin-top: 20px; height: 200px;">
|
| 172 |
+
<h3 style="color: #2196F3;">📈 Model Metrics</h3>
|
| 173 |
+
<p>Analyze model performance with detailed metrics and comparisons. Understand how your models perform on different datasets.</p>
|
| 174 |
+
<p>Metrics available:</p>
|
| 175 |
+
<ul>
|
| 176 |
+
<li>Accuracy, precision, recall</li>
|
| 177 |
+
<li>Confusion matrices</li>
|
| 178 |
+
<li>Performance comparisons</li>
|
| 179 |
+
<li>Custom metric calculations</li>
|
| 180 |
+
</ul>
|
| 181 |
+
</div>
|
| 182 |
+
""", unsafe_allow_html=True)
|
| 183 |
+
|
| 184 |
+
# Getting started section
|
| 185 |
+
st.markdown("""
|
| 186 |
+
<div style="background-color: #FFFFFF; padding: 20px; border-radius: 10px; margin-top: 20px;">
|
| 187 |
+
<h3 style="color: #2196F3;">Getting Started</h3>
|
| 188 |
+
<p>To begin exploring, select a page from the sidebar navigation. You can:</p>
|
| 189 |
+
<ol>
|
| 190 |
+
<li>Write and test Python code in the <b>Code Editor</b></li>
|
| 191 |
+
<li>Search for and explore datasets in the <b>Dataset Explorer</b></li>
|
| 192 |
+
<li>Create visualizations in the <b>Visualizations</b> section</li>
|
| 193 |
+
<li>Analyze model performance in the <b>Model Metrics</b> page</li>
|
| 194 |
+
</ol>
|
| 195 |
+
<p>Ready to dive in? Select a page from the sidebar to get started!</p>
|
| 196 |
+
</div>
|
| 197 |
+
""", unsafe_allow_html=True)
|
| 198 |
+
|
| 199 |
+
if __name__ == "__main__":
|
| 200 |
+
main()
|
generated-icon.png
ADDED
|
|
Git LFS Details
|
pyproject.toml
ADDED
|
@@ -0,0 +1,17 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
[project]
|
| 2 |
+
name = "repl-nix-workspace"
|
| 3 |
+
version = "0.1.0"
|
| 4 |
+
description = "Add your description here"
|
| 5 |
+
requires-python = ">=3.11"
|
| 6 |
+
dependencies = [
|
| 7 |
+
"datasets>=3.3.2",
|
| 8 |
+
"matplotlib>=3.10.1",
|
| 9 |
+
"numpy>=2.2.3",
|
| 10 |
+
"pandas>=2.2.3",
|
| 11 |
+
"pillow>=11.1.0",
|
| 12 |
+
"plotly>=6.0.0",
|
| 13 |
+
"scikit-learn>=1.6.1",
|
| 14 |
+
"seaborn>=0.13.2",
|
| 15 |
+
"streamlit>=1.42.2",
|
| 16 |
+
"transformers>=4.49.0",
|
| 17 |
+
]
|
replit.nix
ADDED
|
@@ -0,0 +1,23 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{pkgs}: {
|
| 2 |
+
deps = [
|
| 3 |
+
pkgs.zlib
|
| 4 |
+
pkgs.openjpeg
|
| 5 |
+
pkgs.libxcrypt
|
| 6 |
+
pkgs.libwebp
|
| 7 |
+
pkgs.libtiff
|
| 8 |
+
pkgs.libjpeg
|
| 9 |
+
pkgs.libimagequant
|
| 10 |
+
pkgs.lcms2
|
| 11 |
+
pkgs.tk
|
| 12 |
+
pkgs.tcl
|
| 13 |
+
pkgs.qhull
|
| 14 |
+
pkgs.pkg-config
|
| 15 |
+
pkgs.gtk3
|
| 16 |
+
pkgs.gobject-introspection
|
| 17 |
+
pkgs.ghostscript
|
| 18 |
+
pkgs.freetype
|
| 19 |
+
pkgs.ffmpeg-full
|
| 20 |
+
pkgs.cairo
|
| 21 |
+
pkgs.glibcLocales
|
| 22 |
+
];
|
| 23 |
+
}
|
security_scanner.py
ADDED
|
@@ -0,0 +1,74 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
|
| 2 |
+
import requests
|
| 3 |
+
import os
|
| 4 |
+
import json
|
| 5 |
+
from typing import Dict, Any, Optional
|
| 6 |
+
|
| 7 |
+
def scan_code_for_security(
|
| 8 |
+
code: str,
|
| 9 |
+
api_key: Optional[str] = None
|
| 10 |
+
) -> Dict[str, Any]:
|
| 11 |
+
"""
|
| 12 |
+
Scan code for security vulnerabilities using the CodePal Security Scanner API.
|
| 13 |
+
|
| 14 |
+
Args:
|
| 15 |
+
code: The code to scan as a string
|
| 16 |
+
api_key: Your CodePal API key (falls back to environment variable)
|
| 17 |
+
|
| 18 |
+
Returns:
|
| 19 |
+
Dict containing the API response
|
| 20 |
+
|
| 21 |
+
Raises:
|
| 22 |
+
ValueError: If API key is not provided
|
| 23 |
+
requests.RequestException: If the API request fails
|
| 24 |
+
"""
|
| 25 |
+
# Get API key from parameter or environment
|
| 26 |
+
api_key = api_key or os.environ.get('CODEPAL_API_KEY')
|
| 27 |
+
|
| 28 |
+
if not api_key:
|
| 29 |
+
raise ValueError(
|
| 30 |
+
"API key is required. Either pass it as a parameter or set "
|
| 31 |
+
"the CODEPAL_API_KEY environment variable."
|
| 32 |
+
)
|
| 33 |
+
|
| 34 |
+
# API endpoint and headers
|
| 35 |
+
url = "https://api.codepal.ai/v1/security-code-scanner/query"
|
| 36 |
+
headers = {
|
| 37 |
+
"Authorization": f"Bearer {api_key}"
|
| 38 |
+
}
|
| 39 |
+
|
| 40 |
+
# Create multipart form data
|
| 41 |
+
files = {
|
| 42 |
+
'code': (None, code)
|
| 43 |
+
}
|
| 44 |
+
|
| 45 |
+
try:
|
| 46 |
+
# Make the API request
|
| 47 |
+
response = requests.post(url, headers=headers, files=files)
|
| 48 |
+
response.raise_for_status() # Raise exception for non-2xx status codes
|
| 49 |
+
|
| 50 |
+
return response.json()
|
| 51 |
+
except requests.RequestException as e:
|
| 52 |
+
print(f"Error scanning code: {e}")
|
| 53 |
+
if response and hasattr(response, 'text'):
|
| 54 |
+
print(f"Response content: {response.text}")
|
| 55 |
+
raise
|
| 56 |
+
|
| 57 |
+
if __name__ == "__main__":
|
| 58 |
+
# Example usage
|
| 59 |
+
sample_code = """
|
| 60 |
+
import os
|
| 61 |
+
|
| 62 |
+
def run_command(user_input):
|
| 63 |
+
os.system(user_input)
|
| 64 |
+
|
| 65 |
+
run_command("ls")
|
| 66 |
+
"""
|
| 67 |
+
|
| 68 |
+
# For testing, replace this with your actual API key
|
| 69 |
+
# or set the CODEPAL_API_KEY environment variable
|
| 70 |
+
try:
|
| 71 |
+
result = scan_code_for_security(sample_code)
|
| 72 |
+
print(json.dumps(result, indent=2))
|
| 73 |
+
except Exception as e:
|
| 74 |
+
print(f"Failed to scan code: {e}")
|
utils.py
ADDED
|
@@ -0,0 +1,300 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import streamlit as st
|
| 2 |
+
import pandas as pd
|
| 3 |
+
import os
|
| 4 |
+
import base64
|
| 5 |
+
from pathlib import Path
|
| 6 |
+
import matplotlib.pyplot as plt
|
| 7 |
+
import seaborn as sns
|
| 8 |
+
import numpy as np
|
| 9 |
+
from datasets import load_dataset
|
| 10 |
+
|
| 11 |
+
def load_css():
|
| 12 |
+
"""Load custom CSS"""
|
| 13 |
+
with open('styles/custom.css') as f:
|
| 14 |
+
st.markdown(f'<style>{f.read()}</style>', unsafe_allow_html=True)
|
| 15 |
+
|
| 16 |
+
def create_logo():
|
| 17 |
+
"""Create and display the logo"""
|
| 18 |
+
from PIL import Image
|
| 19 |
+
import os
|
| 20 |
+
|
| 21 |
+
# Path to the logo image
|
| 22 |
+
logo_path = "assets/python_huggingface_logo.png"
|
| 23 |
+
|
| 24 |
+
# Check if the logo exists
|
| 25 |
+
if os.path.exists(logo_path):
|
| 26 |
+
# Display the logo image
|
| 27 |
+
image = Image.open(logo_path)
|
| 28 |
+
st.image(image, width=200)
|
| 29 |
+
else:
|
| 30 |
+
# Fallback to text if image is not found
|
| 31 |
+
st.markdown(
|
| 32 |
+
"""
|
| 33 |
+
<div style="display: flex; justify-content: center; margin-bottom: 20px;">
|
| 34 |
+
<h2 style="color: #2196F3;">Python & HuggingFace Explorer</h2>
|
| 35 |
+
</div>
|
| 36 |
+
""",
|
| 37 |
+
unsafe_allow_html=True
|
| 38 |
+
)
|
| 39 |
+
|
| 40 |
+
def get_dataset_info(dataset_name):
|
| 41 |
+
"""Get basic information about a HuggingFace dataset"""
|
| 42 |
+
if not dataset_name or not isinstance(dataset_name, str):
|
| 43 |
+
st.error("Invalid dataset name")
|
| 44 |
+
return None, None
|
| 45 |
+
|
| 46 |
+
try:
|
| 47 |
+
# Attempt to load the dataset with default configuration
|
| 48 |
+
st.info(f"Loading dataset: {dataset_name}...")
|
| 49 |
+
|
| 50 |
+
try:
|
| 51 |
+
# First try to load the dataset with streaming=False for better compatibility
|
| 52 |
+
dataset = load_dataset(dataset_name, streaming=False)
|
| 53 |
+
# Get the first split
|
| 54 |
+
first_split = next(iter(dataset.keys()))
|
| 55 |
+
data = dataset[first_split]
|
| 56 |
+
except Exception as e:
|
| 57 |
+
st.warning(f"Couldn't load dataset with default configuration: {str(e)}. Trying specific splits...")
|
| 58 |
+
# If that fails, try loading with specific splits
|
| 59 |
+
for split_name in ["train", "test", "validation"]:
|
| 60 |
+
try:
|
| 61 |
+
st.info(f"Trying to load '{split_name}' split...")
|
| 62 |
+
data = load_dataset(dataset_name, split=split_name, streaming=False)
|
| 63 |
+
break
|
| 64 |
+
except Exception as split_error:
|
| 65 |
+
if split_name == "validation": # Last attempt
|
| 66 |
+
st.error(f"Failed to load dataset with any standard split: {str(split_error)}")
|
| 67 |
+
return None, None
|
| 68 |
+
continue
|
| 69 |
+
|
| 70 |
+
# Get basic info
|
| 71 |
+
info = {
|
| 72 |
+
"Dataset": dataset_name,
|
| 73 |
+
"Number of examples": len(data),
|
| 74 |
+
"Features": list(data.features.keys()),
|
| 75 |
+
"Sample": data[0] if len(data) > 0 else None
|
| 76 |
+
}
|
| 77 |
+
|
| 78 |
+
st.success(f"Successfully loaded dataset with {info['Number of examples']} examples")
|
| 79 |
+
return info, data
|
| 80 |
+
except Exception as e:
|
| 81 |
+
st.error(f"Error loading dataset: {str(e)}")
|
| 82 |
+
if "Connection error" in str(e) or "timeout" in str(e).lower():
|
| 83 |
+
st.warning("Network issue detected. Please check your internet connection and try again.")
|
| 84 |
+
elif "not found" in str(e).lower():
|
| 85 |
+
st.warning(f"Dataset '{dataset_name}' not found. Please check the dataset name and try again.")
|
| 86 |
+
return None, None
|
| 87 |
+
|
| 88 |
+
def run_code(code):
|
| 89 |
+
"""Run Python code and capture output"""
|
| 90 |
+
import io
|
| 91 |
+
import sys
|
| 92 |
+
import time
|
| 93 |
+
from contextlib import redirect_stdout, redirect_stderr
|
| 94 |
+
|
| 95 |
+
# Create StringIO objects to capture stdout and stderr
|
| 96 |
+
stdout_capture = io.StringIO()
|
| 97 |
+
stderr_capture = io.StringIO()
|
| 98 |
+
|
| 99 |
+
# Dictionary for storing results
|
| 100 |
+
results = {
|
| 101 |
+
"output": "",
|
| 102 |
+
"error": "",
|
| 103 |
+
"figures": []
|
| 104 |
+
}
|
| 105 |
+
|
| 106 |
+
# Safety check - limit code size
|
| 107 |
+
if len(code) > 100000:
|
| 108 |
+
results["error"] = "Code submission too large. Please reduce the size."
|
| 109 |
+
return results
|
| 110 |
+
|
| 111 |
+
# Basic security check - this is not comprehensive
|
| 112 |
+
dangerous_imports = ['os.system', 'subprocess', 'eval(', 'shutil.rmtree', 'open(', 'with open']
|
| 113 |
+
for dangerous_import in dangerous_imports:
|
| 114 |
+
if dangerous_import in code:
|
| 115 |
+
results["error"] = f"Potential security risk: {dangerous_import} is not allowed."
|
| 116 |
+
return results
|
| 117 |
+
|
| 118 |
+
# Capture current figures to avoid including existing ones
|
| 119 |
+
initial_figs = plt.get_fignums()
|
| 120 |
+
|
| 121 |
+
# Set execution timeout
|
| 122 |
+
MAX_EXECUTION_TIME = 30 # seconds
|
| 123 |
+
start_time = time.time()
|
| 124 |
+
|
| 125 |
+
try:
|
| 126 |
+
# Create a restricted globals dictionary
|
| 127 |
+
safe_globals = {
|
| 128 |
+
'plt': plt,
|
| 129 |
+
'pd': pd,
|
| 130 |
+
'np': np,
|
| 131 |
+
'sns': sns,
|
| 132 |
+
'print': print,
|
| 133 |
+
'__builtins__': __builtins__,
|
| 134 |
+
}
|
| 135 |
+
|
| 136 |
+
# Add common data science libraries
|
| 137 |
+
for module_name in ['datasets', 'transformers', 'sklearn', 'math']:
|
| 138 |
+
try:
|
| 139 |
+
module = __import__(module_name)
|
| 140 |
+
safe_globals[module_name] = module
|
| 141 |
+
except ImportError:
|
| 142 |
+
pass # Module not available
|
| 143 |
+
|
| 144 |
+
# Redirect stdout and stderr
|
| 145 |
+
with redirect_stdout(stdout_capture), redirect_stderr(stderr_capture):
|
| 146 |
+
# Execute the code with timeout check
|
| 147 |
+
exec(code, safe_globals)
|
| 148 |
+
|
| 149 |
+
if time.time() - start_time > MAX_EXECUTION_TIME:
|
| 150 |
+
raise TimeoutError("Code execution exceeded maximum allowed time.")
|
| 151 |
+
|
| 152 |
+
# Get the captured output
|
| 153 |
+
results["output"] = stdout_capture.getvalue()
|
| 154 |
+
|
| 155 |
+
# Also capture stderr
|
| 156 |
+
stderr_output = stderr_capture.getvalue()
|
| 157 |
+
if stderr_output:
|
| 158 |
+
if results["output"]:
|
| 159 |
+
results["output"] += "\n\n--- Warnings/Errors ---\n" + stderr_output
|
| 160 |
+
else:
|
| 161 |
+
results["output"] = "--- Warnings/Errors ---\n" + stderr_output
|
| 162 |
+
|
| 163 |
+
# Capture any figures that were created
|
| 164 |
+
final_figs = plt.get_fignums()
|
| 165 |
+
new_figs = set(final_figs) - set(initial_figs)
|
| 166 |
+
|
| 167 |
+
for fig_num in new_figs:
|
| 168 |
+
fig = plt.figure(fig_num)
|
| 169 |
+
results["figures"].append(fig)
|
| 170 |
+
|
| 171 |
+
except Exception as e:
|
| 172 |
+
# Capture the error
|
| 173 |
+
results["error"] = f"{type(e).__name__}: {str(e)}"
|
| 174 |
+
|
| 175 |
+
return results
|
| 176 |
+
|
| 177 |
+
def get_dataset_preview(data, max_rows=10):
|
| 178 |
+
"""Convert a HuggingFace dataset to a pandas DataFrame for preview"""
|
| 179 |
+
try:
|
| 180 |
+
# Convert to pandas DataFrame
|
| 181 |
+
df = pd.DataFrame(data[:max_rows])
|
| 182 |
+
return df
|
| 183 |
+
except Exception as e:
|
| 184 |
+
st.error(f"Error converting dataset to DataFrame: {str(e)}")
|
| 185 |
+
return None
|
| 186 |
+
|
| 187 |
+
def generate_basic_stats(data):
|
| 188 |
+
"""Generate basic statistics for a dataset"""
|
| 189 |
+
try:
|
| 190 |
+
# Convert to pandas DataFrame
|
| 191 |
+
df = pd.DataFrame(data)
|
| 192 |
+
|
| 193 |
+
# Get column types
|
| 194 |
+
column_types = df.dtypes
|
| 195 |
+
|
| 196 |
+
# Initialize stats dictionary
|
| 197 |
+
stats = {}
|
| 198 |
+
|
| 199 |
+
for col in df.columns:
|
| 200 |
+
col_stats = {}
|
| 201 |
+
|
| 202 |
+
# Check if column is numeric
|
| 203 |
+
if pd.api.types.is_numeric_dtype(df[col]):
|
| 204 |
+
col_stats["mean"] = df[col].mean()
|
| 205 |
+
col_stats["median"] = df[col].median()
|
| 206 |
+
col_stats["std"] = df[col].std()
|
| 207 |
+
col_stats["min"] = df[col].min()
|
| 208 |
+
col_stats["max"] = df[col].max()
|
| 209 |
+
col_stats["missing"] = df[col].isna().sum()
|
| 210 |
+
# Check if column is string/object
|
| 211 |
+
elif pd.api.types.is_string_dtype(df[col]) or pd.api.types.is_object_dtype(df[col]):
|
| 212 |
+
col_stats["unique_values"] = df[col].nunique()
|
| 213 |
+
col_stats["most_common"] = df[col].value_counts().head(5).to_dict() if df[col].nunique() < 100 else "Too many unique values"
|
| 214 |
+
col_stats["missing"] = df[col].isna().sum()
|
| 215 |
+
|
| 216 |
+
stats[col] = col_stats
|
| 217 |
+
|
| 218 |
+
return stats
|
| 219 |
+
except Exception as e:
|
| 220 |
+
st.error(f"Error generating statistics: {str(e)}")
|
| 221 |
+
return None
|
| 222 |
+
|
| 223 |
+
def create_visualization(data, viz_type, x_col=None, y_col=None, hue_col=None):
|
| 224 |
+
"""Create a visualization based on the selected type and columns"""
|
| 225 |
+
try:
|
| 226 |
+
df = pd.DataFrame(data)
|
| 227 |
+
|
| 228 |
+
fig, ax = plt.subplots(figsize=(10, 6))
|
| 229 |
+
|
| 230 |
+
if viz_type == "Bar Chart":
|
| 231 |
+
if x_col and y_col:
|
| 232 |
+
sns.barplot(x=x_col, y=y_col, hue=hue_col, data=df, ax=ax)
|
| 233 |
+
else:
|
| 234 |
+
st.warning("Bar charts require both X and Y columns.")
|
| 235 |
+
return None
|
| 236 |
+
|
| 237 |
+
elif viz_type == "Line Chart":
|
| 238 |
+
if x_col and y_col:
|
| 239 |
+
sns.lineplot(x=x_col, y=y_col, hue=hue_col, data=df, ax=ax)
|
| 240 |
+
else:
|
| 241 |
+
st.warning("Line charts require both X and Y columns.")
|
| 242 |
+
return None
|
| 243 |
+
|
| 244 |
+
elif viz_type == "Scatter Plot":
|
| 245 |
+
if x_col and y_col:
|
| 246 |
+
sns.scatterplot(x=x_col, y=y_col, hue=hue_col, data=df, ax=ax)
|
| 247 |
+
else:
|
| 248 |
+
st.warning("Scatter plots require both X and Y columns.")
|
| 249 |
+
return None
|
| 250 |
+
|
| 251 |
+
elif viz_type == "Histogram":
|
| 252 |
+
if x_col:
|
| 253 |
+
sns.histplot(df[x_col], ax=ax)
|
| 254 |
+
else:
|
| 255 |
+
st.warning("Histograms require an X column.")
|
| 256 |
+
return None
|
| 257 |
+
|
| 258 |
+
elif viz_type == "Box Plot":
|
| 259 |
+
if x_col and y_col:
|
| 260 |
+
sns.boxplot(x=x_col, y=y_col, hue=hue_col, data=df, ax=ax)
|
| 261 |
+
else:
|
| 262 |
+
st.warning("Box plots require both X and Y columns.")
|
| 263 |
+
return None
|
| 264 |
+
|
| 265 |
+
elif viz_type == "Count Plot":
|
| 266 |
+
if x_col:
|
| 267 |
+
sns.countplot(x=x_col, hue=hue_col, data=df, ax=ax)
|
| 268 |
+
else:
|
| 269 |
+
st.warning("Count plots require an X column.")
|
| 270 |
+
return None
|
| 271 |
+
|
| 272 |
+
# Set title and labels
|
| 273 |
+
plt.title(f"{viz_type} of {y_col if y_col else ''} vs {x_col if x_col else ''}")
|
| 274 |
+
plt.xlabel(x_col if x_col else "")
|
| 275 |
+
plt.ylabel(y_col if y_col else "")
|
| 276 |
+
plt.tight_layout()
|
| 277 |
+
|
| 278 |
+
return fig
|
| 279 |
+
|
| 280 |
+
except Exception as e:
|
| 281 |
+
st.error(f"Error creating visualization: {str(e)}")
|
| 282 |
+
return None
|
| 283 |
+
|
| 284 |
+
def get_popular_datasets(category=None, limit=10):
|
| 285 |
+
"""Get popular HuggingFace datasets, optionally filtered by category"""
|
| 286 |
+
popular_datasets = {
|
| 287 |
+
"Text": ["glue", "imdb", "squad", "wikitext", "ag_news"],
|
| 288 |
+
"Image": ["cifar10", "cifar100", "mnist", "fashion_mnist", "coco"],
|
| 289 |
+
"Audio": ["common_voice", "librispeech_asr", "voxpopuli", "voxceleb", "audiofolder"],
|
| 290 |
+
"Multimodal": ["conceptual_captions", "flickr8k", "hateful_memes", "nlvr", "vqa"]
|
| 291 |
+
}
|
| 292 |
+
|
| 293 |
+
if category and category in popular_datasets:
|
| 294 |
+
return popular_datasets[category][:limit]
|
| 295 |
+
else:
|
| 296 |
+
# Return all datasets flattened
|
| 297 |
+
all_datasets = []
|
| 298 |
+
for cat_datasets in popular_datasets.values():
|
| 299 |
+
all_datasets.extend(cat_datasets)
|
| 300 |
+
return all_datasets[:limit]
|
uv.lock
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|