Spaces:
Sleeping
Sleeping
File size: 4,874 Bytes
f3352b5 0f56e7e f3352b5 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 |
import asyncio
import pandas as pd
from concurrent.futures import ThreadPoolExecutor
from flask import Flask, request, render_template
from catboost import CatBoostClassifier # Import CatBoost
from url_process import extract_url_features # Ensure you have the appropriate feature extraction function
import os
# Batch Processing: Ensures URLs are processed in manageable chunks
def process_urls_in_batches(urls, batch_size=10):
for i in range(0, len(urls), batch_size):
yield urls[i:i + batch_size]
# Async function for non-blocking DNS lookups and HTTP requests
async def async_extract_features(url):
features = await asyncio.to_thread(extract_url_features, url)
return features
# ThreadPoolExecutor for CPU-bound tasks like feature extraction
def extract_features_in_parallel(urls):
with ThreadPoolExecutor(max_workers=5) as executor:
return list(executor.map(extract_url_features, urls))
# Load the CatBoost model for inference
def predict_with_catboost(features_df, model_path):
model = CatBoostClassifier()
model.load_model(model_path)
predictions = model.predict(features_df)
return predictions
# Flask App Setup
app = Flask(__name__)
@app.route("/", methods=["GET", "POST"])
async def index():
result = None
url_features = None
if request.method == "POST":
# Get the URL input from the form
url = request.form["url"]
try:
# Asynchronously process the URL features
features = await async_extract_features(url)
# Convert the features to DataFrame (in case you need to do further processing)
features_df = pd.DataFrame([features])
# Perform prediction using the CatBoost model
model_path = (os.path.join(os.getcwd(),"catboost_model.bin"))
# model_path = "F:\\pyro guard\\model\\catboost_model.bin" # Specify your CatBoost model path here
predictions = predict_with_catboost(features_df, model_path)
# Determine if the URL is malicious or legitimate
if predictions[0] == 1:
result = "Malicious"
else:
result = "Legitimate"
# Optionally, display the extracted features
url_features = features
except Exception as e:
result = f"Error processing URL: {str(e)}"
return render_template("index.html", result=result, url_features=url_features)
if __name__ == "__main__":
app.run(debug=False,host="0.0.0.0",port=7860)
'''
import asyncio
import pandas as pd
from concurrent.futures import ThreadPoolExecutor
from flask import Flask, request, render_template
from catboost import CatBoostClassifier
from url_process import extract_url_features, predict_urls # Import necessary functions
# Flask App Setup
app = Flask(__name__)
# Batch Processing: Ensures URLs are processed in manageable chunks
def process_urls_in_batches(urls, batch_size=10):
for i in range(0, len(urls), batch_size):
yield urls[i:i + batch_size]
# Async function for non-blocking DNS lookups and HTTP requests
async def async_extract_features(url):
features = await asyncio.to_thread(extract_url_features, url)
return features
# ThreadPoolExecutor for CPU-bound tasks like feature extraction
def extract_features_in_parallel(urls):
with ThreadPoolExecutor(max_workers=5) as executor:
return list(executor.map(extract_url_features, urls))
# Load the CatBoost model for inference
def predict_with_catboost(features_df, model_path):
model = CatBoostClassifier()
model.load_model(model_path)
predictions = model.predict(features_df)
return predictions
@app.route("/", methods=["GET", "POST"])
async def index():
result = None
url_features = None
if request.method == "POST":
# Get the URL input from the form
url = request.form["url"]
try:
# Asynchronously process the URL features
features = await async_extract_features(url)
# Convert the features to a DataFrame for further processing
features_df = pd.DataFrame([features])
# Perform prediction using the CatBoost model
model_path = "F:\\pyro guard\\model\\catboost_model.bin" # Specify your CatBoost model path
predictions = predict_with_catboost(features_df, model_path)
# Determine if the URL is malicious or legitimate
if predictions[0] == 1:
result = "Malicious"
else:
result = "Legitimate"
except Exception as e:
result = f"Error processing URL: {str(e)}"
return render_template("index.html", result=result, url_features=url_features)
if __name__ == "__main__":
app.run(debug=True)
''' |