Spaces:
Sleeping
Sleeping
import asyncio | |
import pandas as pd | |
from concurrent.futures import ThreadPoolExecutor | |
from flask import Flask, request, render_template | |
from catboost import CatBoostClassifier # Import CatBoost | |
from url_process import extract_url_features # Ensure you have the appropriate feature extraction function | |
import os | |
# Batch Processing: Ensures URLs are processed in manageable chunks | |
def process_urls_in_batches(urls, batch_size=10): | |
for i in range(0, len(urls), batch_size): | |
yield urls[i:i + batch_size] | |
# Async function for non-blocking DNS lookups and HTTP requests | |
async def async_extract_features(url): | |
features = await asyncio.to_thread(extract_url_features, url) | |
return features | |
# ThreadPoolExecutor for CPU-bound tasks like feature extraction | |
def extract_features_in_parallel(urls): | |
with ThreadPoolExecutor(max_workers=5) as executor: | |
return list(executor.map(extract_url_features, urls)) | |
# Load the CatBoost model for inference | |
def predict_with_catboost(features_df, model_path): | |
model = CatBoostClassifier() | |
model.load_model(model_path) | |
predictions = model.predict(features_df) | |
return predictions | |
# Flask App Setup | |
app = Flask(__name__) | |
async def index(): | |
result = None | |
url_features = None | |
if request.method == "POST": | |
# Get the URL input from the form | |
url = request.form["url"] | |
try: | |
# Asynchronously process the URL features | |
features = await async_extract_features(url) | |
# Convert the features to DataFrame (in case you need to do further processing) | |
features_df = pd.DataFrame([features]) | |
# Perform prediction using the CatBoost model | |
model_path = (os.path.join(os.getcwd(),"catboost_model.bin")) | |
# model_path = "F:\\pyro guard\\model\\catboost_model.bin" # Specify your CatBoost model path here | |
predictions = predict_with_catboost(features_df, model_path) | |
# Determine if the URL is malicious or legitimate | |
if predictions[0] == 1: | |
result = "Malicious" | |
else: | |
result = "Legitimate" | |
# Optionally, display the extracted features | |
url_features = features | |
except Exception as e: | |
result = f"Error processing URL: {str(e)}" | |
return render_template("index.html", result=result, url_features=url_features) | |
if __name__ == "__main__": | |
app.run(debug=False,host="0.0.0.0",port=7860) | |
''' | |
import asyncio | |
import pandas as pd | |
from concurrent.futures import ThreadPoolExecutor | |
from flask import Flask, request, render_template | |
from catboost import CatBoostClassifier | |
from url_process import extract_url_features, predict_urls # Import necessary functions | |
# Flask App Setup | |
app = Flask(__name__) | |
# Batch Processing: Ensures URLs are processed in manageable chunks | |
def process_urls_in_batches(urls, batch_size=10): | |
for i in range(0, len(urls), batch_size): | |
yield urls[i:i + batch_size] | |
# Async function for non-blocking DNS lookups and HTTP requests | |
async def async_extract_features(url): | |
features = await asyncio.to_thread(extract_url_features, url) | |
return features | |
# ThreadPoolExecutor for CPU-bound tasks like feature extraction | |
def extract_features_in_parallel(urls): | |
with ThreadPoolExecutor(max_workers=5) as executor: | |
return list(executor.map(extract_url_features, urls)) | |
# Load the CatBoost model for inference | |
def predict_with_catboost(features_df, model_path): | |
model = CatBoostClassifier() | |
model.load_model(model_path) | |
predictions = model.predict(features_df) | |
return predictions | |
@app.route("/", methods=["GET", "POST"]) | |
async def index(): | |
result = None | |
url_features = None | |
if request.method == "POST": | |
# Get the URL input from the form | |
url = request.form["url"] | |
try: | |
# Asynchronously process the URL features | |
features = await async_extract_features(url) | |
# Convert the features to a DataFrame for further processing | |
features_df = pd.DataFrame([features]) | |
# Perform prediction using the CatBoost model | |
model_path = "F:\\pyro guard\\model\\catboost_model.bin" # Specify your CatBoost model path | |
predictions = predict_with_catboost(features_df, model_path) | |
# Determine if the URL is malicious or legitimate | |
if predictions[0] == 1: | |
result = "Malicious" | |
else: | |
result = "Legitimate" | |
except Exception as e: | |
result = f"Error processing URL: {str(e)}" | |
return render_template("index.html", result=result, url_features=url_features) | |
if __name__ == "__main__": | |
app.run(debug=True) | |
''' |