kebos-ai / app.py
HARISH20205's picture
no binary
8b6e40c
import asyncio
import pandas as pd
from concurrent.futures import ThreadPoolExecutor
from flask import Flask, request, render_template
from catboost import CatBoostClassifier # Import CatBoost
from url_process import extract_url_features # Ensure you have the appropriate feature extraction function
import os
# Batch Processing: Ensures URLs are processed in manageable chunks
def process_urls_in_batches(urls, batch_size=10):
for i in range(0, len(urls), batch_size):
yield urls[i:i + batch_size]
# Async function for non-blocking DNS lookups and HTTP requests
async def async_extract_features(url):
features = await asyncio.to_thread(extract_url_features, url)
return features
# ThreadPoolExecutor for CPU-bound tasks like feature extraction
def extract_features_in_parallel(urls):
with ThreadPoolExecutor(max_workers=5) as executor:
return list(executor.map(extract_url_features, urls))
# Load the CatBoost model for inference
def predict_with_catboost(features_df, model_path):
try:
print(f"Attempting to load model from: {model_path}")
print(f"File exists: {os.path.exists(model_path)}")
print(f"File size: {os.path.getsize(model_path)}")
model = CatBoostClassifier()
model.load_model(model_path)
predictions = model.predict(features_df)
return predictions
except Exception as e:
print(f"Error loading model: {str(e)}")
raise
# Flask App Setup
app = Flask(__name__)
@app.route("/", methods=["GET", "POST"])
async def index():
result = None
url_features = None
if request.method == "POST":
# Get the URL input from the form
url = request.form["url"]
try:
# Asynchronously process the URL features
features = await async_extract_features(url)
# Convert the features to DataFrame (in case you need to do further processing)
features_df = pd.DataFrame([features])
# Try multiple possible model locations
possible_paths = [
os.path.join(os.getcwd(), "catboost_model.bin"),
"/app/catboost_model.bin", # Docker container path
"catboost_model.bin"
]
model_path = None
for path in possible_paths:
if os.path.exists(path):
model_path = path
break
if model_path is None:
raise FileNotFoundError("Model file not found in any expected location")
predictions = predict_with_catboost(features_df, model_path)
# Determine if the URL is malicious or legitimate
if predictions[0] == 1:
result = "Malicious"
else:
result = "Legitimate"
# Optionally, display the extracted features
url_features = features
except Exception as e:
result = f"Error processing URL: {str(e)}"
return render_template("index.html", result=result, url_features=url_features)
if __name__ == "__main__":
app.run(debug=False,host="0.0.0.0",port=7860)