Spaces:

HARISH20205
/

kebos-ai

Sleeping

File size: 4,874 Bytes

import asyncio
import pandas as pd
from concurrent.futures import ThreadPoolExecutor
from flask import Flask, request, render_template
from catboost import CatBoostClassifier  # Import CatBoost
from url_process import extract_url_features  # Ensure you have the appropriate feature extraction function
import os
# Batch Processing: Ensures URLs are processed in manageable chunks
def process_urls_in_batches(urls, batch_size=10):
    for i in range(0, len(urls), batch_size):
        yield urls[i:i + batch_size]

# Async function for non-blocking DNS lookups and HTTP requests
async def async_extract_features(url):
    features = await asyncio.to_thread(extract_url_features, url)
    return features

# ThreadPoolExecutor for CPU-bound tasks like feature extraction
def extract_features_in_parallel(urls):
    with ThreadPoolExecutor(max_workers=5) as executor:
        return list(executor.map(extract_url_features, urls))

# Load the CatBoost model for inference
def predict_with_catboost(features_df, model_path):
    model = CatBoostClassifier()
    model.load_model(model_path)
    predictions = model.predict(features_df)
    return predictions

# Flask App Setup
app = Flask(__name__)

@app.route("/", methods=["GET", "POST"])
async def index():
    result = None
    url_features = None

    if request.method == "POST":
        # Get the URL input from the form
        url = request.form["url"]
        
        try:
            # Asynchronously process the URL features
            features = await async_extract_features(url)
            
            # Convert the features to DataFrame (in case you need to do further processing)
            features_df = pd.DataFrame([features])
            
            # Perform prediction using the CatBoost model
            model_path = (os.path.join(os.getcwd(),"catboost_model.bin"))
            # model_path = "F:\\pyro guard\\model\\catboost_model.bin"  # Specify your CatBoost model path here
            predictions = predict_with_catboost(features_df, model_path)
            
            # Determine if the URL is malicious or legitimate
            if predictions[0] == 1:
                result = "Malicious"
            else:
                result = "Legitimate"
            
            # Optionally, display the extracted features
            url_features = features
            
        except Exception as e:
            result = f"Error processing URL: {str(e)}"
    
    return render_template("index.html", result=result, url_features=url_features)

if __name__ == "__main__":
    app.run(debug=False,host="0.0.0.0",port=7860)

'''
import asyncio
import pandas as pd
from concurrent.futures import ThreadPoolExecutor
from flask import Flask, request, render_template
from catboost import CatBoostClassifier
from url_process import extract_url_features, predict_urls  # Import necessary functions

# Flask App Setup
app = Flask(__name__)

# Batch Processing: Ensures URLs are processed in manageable chunks
def process_urls_in_batches(urls, batch_size=10):
    for i in range(0, len(urls), batch_size):
        yield urls[i:i + batch_size]

# Async function for non-blocking DNS lookups and HTTP requests
async def async_extract_features(url):
    features = await asyncio.to_thread(extract_url_features, url)
    return features

# ThreadPoolExecutor for CPU-bound tasks like feature extraction
def extract_features_in_parallel(urls):
    with ThreadPoolExecutor(max_workers=5) as executor:
        return list(executor.map(extract_url_features, urls))

# Load the CatBoost model for inference
def predict_with_catboost(features_df, model_path):
    model = CatBoostClassifier()
    model.load_model(model_path)
    predictions = model.predict(features_df)
    return predictions

@app.route("/", methods=["GET", "POST"])
async def index():
    result = None
    url_features = None

    if request.method == "POST":
        # Get the URL input from the form
        url = request.form["url"]

        try:
            # Asynchronously process the URL features
            features = await async_extract_features(url)

            # Convert the features to a DataFrame for further processing
            features_df = pd.DataFrame([features])

            # Perform prediction using the CatBoost model
            model_path = "F:\\pyro guard\\model\\catboost_model.bin"  # Specify your CatBoost model path
            predictions = predict_with_catboost(features_df, model_path)

            # Determine if the URL is malicious or legitimate
            if predictions[0] == 1:
                result = "Malicious"
            else:
                result = "Legitimate"

        except Exception as e:
            result = f"Error processing URL: {str(e)}"

    return render_template("index.html", result=result, url_features=url_features)

if __name__ == "__main__":
    app.run(debug=True)

'''