Spaces:

HARISH20205
/

kebos-ai

Sleeping

App Files Files Community

HARISH20205 commited on Feb 4

Commit

f3352b5

0 Parent(s):

first

Browse files

Files changed (16) hide show

.dockerignore +2 -0
.gitattributes +35 -0
.gitignore +2 -0
.hfignore +2 -0
Dockerfile +39 -0
README.md +10 -0
app.py +138 -0
async_utils.py +11 -0
catboost_model.bin +3 -0
model_utils.py +0 -0
requirements.txt +13 -0
static/styles.css +79 -0
sync_utils.py +7 -0
templates/index.html +34 -0
test.py +5 -0
url_process.py +439 -0

.dockerignore ADDED Viewed

	@@ -0,0 +1,2 @@


1	+ __pycache__
2	+ myenv/

.gitattributes ADDED Viewed

	@@ -0,0 +1,35 @@

+*.7z filter=lfs diff=lfs merge=lfs -text
+*.arrow filter=lfs diff=lfs merge=lfs -text
+*.bin filter=lfs diff=lfs merge=lfs -text
+*.bz2 filter=lfs diff=lfs merge=lfs -text
+*.ckpt filter=lfs diff=lfs merge=lfs -text
+*.ftz filter=lfs diff=lfs merge=lfs -text
+*.gz filter=lfs diff=lfs merge=lfs -text
+*.h5 filter=lfs diff=lfs merge=lfs -text
+*.joblib filter=lfs diff=lfs merge=lfs -text
+*.lfs.* filter=lfs diff=lfs merge=lfs -text
+*.mlmodel filter=lfs diff=lfs merge=lfs -text
+*.model filter=lfs diff=lfs merge=lfs -text
+*.msgpack filter=lfs diff=lfs merge=lfs -text
+*.npy filter=lfs diff=lfs merge=lfs -text
+*.npz filter=lfs diff=lfs merge=lfs -text
+*.onnx filter=lfs diff=lfs merge=lfs -text
+*.ot filter=lfs diff=lfs merge=lfs -text
+*.parquet filter=lfs diff=lfs merge=lfs -text
+*.pb filter=lfs diff=lfs merge=lfs -text
+*.pickle filter=lfs diff=lfs merge=lfs -text
+*.pkl filter=lfs diff=lfs merge=lfs -text
+*.pt filter=lfs diff=lfs merge=lfs -text
+*.pth filter=lfs diff=lfs merge=lfs -text
+*.rar filter=lfs diff=lfs merge=lfs -text
+*.safetensors filter=lfs diff=lfs merge=lfs -text
+saved_model/**/* filter=lfs diff=lfs merge=lfs -text
+*.tar.* filter=lfs diff=lfs merge=lfs -text
+*.tar filter=lfs diff=lfs merge=lfs -text
+*.tflite filter=lfs diff=lfs merge=lfs -text
+*.tgz filter=lfs diff=lfs merge=lfs -text
+*.wasm filter=lfs diff=lfs merge=lfs -text
+*.xz filter=lfs diff=lfs merge=lfs -text
+*.zip filter=lfs diff=lfs merge=lfs -text
+*.zst filter=lfs diff=lfs merge=lfs -text
+*tfevents* filter=lfs diff=lfs merge=lfs -text

.gitignore ADDED Viewed

	@@ -0,0 +1,2 @@


1	+ __pycache__/
2	+ myenv/

.hfignore ADDED Viewed

	@@ -0,0 +1,2 @@


1	+ __pycache__/
2	+ myenv/

Dockerfile ADDED Viewed

	@@ -0,0 +1,39 @@

+# Base image with Python
+FROM python:3.11-slim
+# Set environment variables
+ENV PYTHONDONTWRITEBYTECODE 1
+ENV PYTHONUNBUFFERED 1
+# Set working directory
+WORKDIR /app
+# Copy the requirements file
+COPY requirements.txt /app/
+# Install Python dependencies
+RUN pip install --no-cache-dir --upgrade pip && \
+    pip install --no-cache-dir -r requirements.txt
+# Install system dependencies
+RUN apt-get update && apt-get install -y \
+    gcc \
+    libgomp1 \
+    && apt-get clean \
+    && rm -rf /var/lib/apt/lists/*
+# Copy the application code
+COPY . /app/
+# Expose the port
+EXPOSE 5000
+ENV FLASK_APP app.py
+ENV FLASK_RUN_HOST 0.0.0.0
+# Start the application
+CMD ["flask", "run", "-h", "0.0.0.0", "-p", "5000"]

README.md ADDED Viewed

	@@ -0,0 +1,10 @@

+---
+title: Kebos Ai
+emoji: 🐠
+colorFrom: purple
+colorTo: green
+sdk: docker
+pinned: false
+---
+Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference

app.py ADDED Viewed

	@@ -0,0 +1,138 @@

+import asyncio
+import pandas as pd
+from concurrent.futures import ThreadPoolExecutor
+from flask import Flask, request, render_template
+from catboost import CatBoostClassifier  # Import CatBoost
+from url_process import extract_url_features  # Ensure you have the appropriate feature extraction function
+import os
+# Batch Processing: Ensures URLs are processed in manageable chunks
+def process_urls_in_batches(urls, batch_size=10):
+    for i in range(0, len(urls), batch_size):
+        yield urls[i:i + batch_size]
+# Async function for non-blocking DNS lookups and HTTP requests
+async def async_extract_features(url):
+    features = await asyncio.to_thread(extract_url_features, url)
+    return features
+# ThreadPoolExecutor for CPU-bound tasks like feature extraction
+def extract_features_in_parallel(urls):
+    with ThreadPoolExecutor(max_workers=5) as executor:
+        return list(executor.map(extract_url_features, urls))
+# Load the CatBoost model for inference
+def predict_with_catboost(features_df, model_path):
+    model = CatBoostClassifier()
+    model.load_model(model_path)
+    predictions = model.predict(features_df)
+    return predictions
+# Flask App Setup
+app = Flask(__name__)
+@app.route("/", methods=["GET", "POST"])
+async def index():
+    result = None
+    url_features = None
+    if request.method == "POST":
+        # Get the URL input from the form
+        url = request.form["url"]
+        try:
+            # Asynchronously process the URL features
+            features = await async_extract_features(url)
+            # Convert the features to DataFrame (in case you need to do further processing)
+            features_df = pd.DataFrame([features])
+            # Perform prediction using the CatBoost model
+            model_path = (os.path.join(os.getcwd(),"catboost_model.bin"))
+            # model_path = "F:\\pyro guard\\model\\catboost_model.bin"  # Specify your CatBoost model path here
+            predictions = predict_with_catboost(features_df, model_path)
+            # Determine if the URL is malicious or legitimate
+            if predictions[0] == 1:
+                result = "Malicious"
+            else:
+                result = "Legitimate"
+            # Optionally, display the extracted features
+            url_features = features
+        except Exception as e:
+            result = f"Error processing URL: {str(e)}"
+    return render_template("index.html", result=result, url_features=url_features)
+if __name__ == "__main__":
+    app.run(debug=False,host="0.0.0.0")
+'''
+import asyncio
+import pandas as pd
+from concurrent.futures import ThreadPoolExecutor
+from flask import Flask, request, render_template
+from catboost import CatBoostClassifier
+from url_process import extract_url_features, predict_urls  # Import necessary functions
+# Flask App Setup
+app = Flask(__name__)
+# Batch Processing: Ensures URLs are processed in manageable chunks
+def process_urls_in_batches(urls, batch_size=10):
+    for i in range(0, len(urls), batch_size):
+        yield urls[i:i + batch_size]
+# Async function for non-blocking DNS lookups and HTTP requests
+async def async_extract_features(url):
+    features = await asyncio.to_thread(extract_url_features, url)
+    return features
+# ThreadPoolExecutor for CPU-bound tasks like feature extraction
+def extract_features_in_parallel(urls):
+    with ThreadPoolExecutor(max_workers=5) as executor:
+        return list(executor.map(extract_url_features, urls))
+# Load the CatBoost model for inference
+def predict_with_catboost(features_df, model_path):
+    model = CatBoostClassifier()
+    model.load_model(model_path)
+    predictions = model.predict(features_df)
+    return predictions
+@app.route("/", methods=["GET", "POST"])
+async def index():
+    result = None
+    url_features = None
+    if request.method == "POST":
+        # Get the URL input from the form
+        url = request.form["url"]
+        try:
+            # Asynchronously process the URL features
+            features = await async_extract_features(url)
+            # Convert the features to a DataFrame for further processing
+            features_df = pd.DataFrame([features])
+            # Perform prediction using the CatBoost model
+            model_path = "F:\\pyro guard\\model\\catboost_model.bin"  # Specify your CatBoost model path
+            predictions = predict_with_catboost(features_df, model_path)
+            # Determine if the URL is malicious or legitimate
+            if predictions[0] == 1:
+                result = "Malicious"
+            else:
+                result = "Legitimate"
+        except Exception as e:
+            result = f"Error processing URL: {str(e)}"
+    return render_template("index.html", result=result, url_features=url_features)
+if __name__ == "__main__":
+    app.run(debug=True)
+'''

async_utils.py ADDED Viewed

	@@ -0,0 +1,11 @@

+import asyncio
+from url_process import extract_url_features
+async def async_extract_features(url):
+    loop = asyncio.get_event_loop()
+    return await loop.run_in_executor(None, extract_url_features, url)
+async def process_urls_async(urls):
+    tasks = [async_extract_features(url) for url in urls]
+    results = await asyncio.gather(*tasks)
+    return results

catboost_model.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:e6186b650ea61418983e18e94fde8127cfb4983aa45c456d68cd2ef396d70e54
+size 8151864

model_utils.py ADDED Viewed

File without changes

requirements.txt ADDED Viewed

	@@ -0,0 +1,13 @@

+requests
+pandas
+tld
+dnspython
+catboost
+lime
+google
+python-whois
+flask==2.1.0
+werkzeug==2.1.0
+aiohttp
+cryptography
+flask[async]

static/styles.css ADDED Viewed

	@@ -0,0 +1,79 @@

+body {
+    font-family: Verdana, Geneva, Tahoma, sans-serif;
+    margin: 0;
+    padding: 0;
+    background-color: #0a0a0a;
+    color: #333;
+}
+header {
+    background-color: #999999;
+    color: white;
+    padding: 20px;
+    text-align: center;
+}
+.container {
+    max-width: 500px;
+    margin: 30px auto;
+    padding: 20px;
+    background-color: #fff;
+    box-shadow: 0 2px 10px rgba(0, 0, 0, 0.1);
+    border-radius: 10px;
+}
+.form-group {
+    margin-bottom: 15px;
+}
+label {
+    font-weight: bold;
+    display: block;
+    margin-bottom: 5px;
+}
+input[type="text"] {
+    width: 95%;
+    padding: 10px;
+    font-size: 16px;
+    border: 1px solid #000000;
+    border-radius: 5px;
+}
+button {
+    background-color: #000000;
+    color: white;
+    border: 2px solid #fff;
+    padding: 10px 20px;
+    cursor: pointer;
+    font-size: 16px;
+    border-radius: 5px;
+}
+button:hover {
+    background-color: #545454;
+}
+.result {
+    margin-top: 20px;
+    padding: 10px;
+    font-size: 18px;
+    text-align: center;
+    border-radius: 5px;
+}
+.result.legitimate {
+    background-color: #000000;
+    color: #ffffff;
+}
+.result.malicious {
+    background-color: #000000;
+    color: #ffffff;
+}
+footer {
+    text-align: center;
+    margin-top: 20px;
+    color: #777;
+}

sync_utils.py ADDED Viewed

	@@ -0,0 +1,7 @@

+from concurrent.futures import ThreadPoolExecutor
+from url_process import extract_url_features
+def process_urls_sync(urls):
+    with ThreadPoolExecutor() as executor:
+        results = list(executor.map(extract_url_features, urls))
+    return results

templates/index.html ADDED Viewed

	@@ -0,0 +1,34 @@

+<!DOCTYPE html>
+<html lang="en">
+<head>
+    <meta charset="UTF-8">
+    <meta name="viewport" content="width=device-width, initial-scale=1.0">
+    <title>URL Classification</title>
+    <link rel="stylesheet" href="{{ url_for('static', filename='styles.css') }}">
+</head>
+<body>
+    <header>
+        <h1>KeBos AI - Intelligent URL Threat Analyser</h1>
+    </header>
+    <div class="container">
+        <form method="POST">
+            <div class="form-group">
+                <label for="url">Enter URL:</label>
+                <input type="text" id="url" name="url" placeholder="https://example.com" required value="{{ url if url else '' }}">
+            </div>
+            <button type="submit">Classify URL</button>
+        </form>
+        {% if result %}
+        <div class="result {% if result == 'Legitimate' %}legitimate{% else %}malicious{% endif %}">
+            Result: {{ result }}
+        </div>
+        {% endif %}
+    </div>
+    <!--<footer>
+        <p>&copy; 2024 URL Classification Tool</p>
+    </footer>-->
+</body>
+</html>

test.py ADDED Viewed

	@@ -0,0 +1,5 @@

+import os
+model_path = (os.path.join(os.getcwd(),"catboost_model.bin"))
+print(model_path)

url_process.py ADDED Viewed

	@@ -0,0 +1,439 @@

+import datetime
+import re
+import socket
+import requests
+import dns.resolver
+import ssl
+from urllib.parse import urlparse, parse_qs
+import whois
+from tld import get_tld
+import pandas as pd
+import time
+from googlesearch import search
+from catboost import CatBoostClassifier
+from lime.lime_tabular import LimeTabularExplainer
+import logging
+import asyncio
+import aiohttp
+# Setup logging
+logging.basicConfig(level=logging.INFO)
+logger = logging.getLogger(__name__)
+# Feature extraction functions
+def extract_url_features(url):
+    features = {}
+    try:
+        # Basic URL features
+        features['qty_dot_url'] = url.count('.')
+        features['qty_slash_url'] = url.count('/')
+        features['qty_at_url'] = url.count('@')
+        features['qty_space_url'] = url.count(' ')
+        features['qty_plus_url'] = url.count('+')
+        features['qty_dollar_url'] = url.count('$')
+        features['length_url'] = len(url)
+        features['qty_equal_url'] = url.count('=')
+        features['qty_asterisk_url'] = url.count('*')
+        features['qty_percent_url'] = url.count('%')
+        features['qty_exclamation_url'] = url.count('!')
+        features['qty_questionmark_url'] = url.count('?')
+        features['qty_tilde_url'] = url.count('~')
+        features['qty_hyphen_url'] = url.count('-')
+        features['qty_hashtag_url'] = url.count('#')
+        features['qty_underline_url'] = url.count('_')
+        tld_pattern = r'\.(com|org|net|gov|edu|io|xyz|info|biz|co|uk|us|ca|au|in|cn|jp|ru|de|fr|it|nl|es|ch|se|no|dk|fi|pl|tr|br|za|mx|kr|sg|my|th|hk|vn|ar|cl|pe|nz|il|pk)'
+        features['qty_tld_url'] = len(re.findall(tld_pattern, url, re.IGNORECASE))
+        features['qty_tld_url'] == get_tld(url, as_object=True, fail_silently=False)
+        features['email_in_url'] = 1 if '@' in url else 0
+        features['url_google_index'] = is_url_indexed(url)
+        features['url_shortened'] = 1 if 'bit.ly' in url or 'tinyurl.com' in url else 0
+        features['qty_comma_url'] = url.count(',')
+        features['qty_and_url'] = url.count('&')
+        # Extract domain from URL
+        domain = url.split('/')[2] if '://' in url else url.split('/')[0]
+        # Domain features
+        features['qty_underline_domain'] = domain.count('_')
+        features['qty_equal_domain'] = domain.count('=')
+        features['qty_exclamation_domain'] = domain.count('!')
+        features['qty_comma_domain'] = domain.count(',')
+        features['qty_hashtag_domain'] = domain.count('#')
+        features['qty_vowels_domain'] = sum(1 for c in domain if c in 'aeiouAEIOU')
+        features['server_client_domain'] = 1 if 'server' in domain or 'client' in domain else 0
+        features['qty_dot_domain'] = domain.count('.')
+        features['domain_in_ip'] = 1 if is_ip_address(domain) else 0
+        features['domain_length'] = len(domain)
+        features['qty_hyphen_domain'] = domain.count('-')
+        features['time_domain_expiration'] = get_domain_time_features(domain)[1]
+        features['qty_percent_domain'] = domain.count('%')
+        features['qty_at_domain'] = domain.count('@')
+        features['domain_spf'] = get_spf_record(domain)
+        features['domain_google_index'] = is_domain_indexed(domain)
+        # Directory features
+        directory = url.split('/')[3] if len(url.split('/')) > 3 else ""
+        features['qty_underline_directory'] = directory.count('_')
+        features['qty_equal_directory'] = directory.count('=')
+        features['qty_exclamation_directory'] = directory.count('!')
+        features['qty_comma_directory'] = directory.count(',')
+        features['qty_hashtag_directory'] = directory.count('#')
+        features['directory_length'] = len(directory)
+        features['qty_space_directory'] = directory.count(' ')
+        features['qty_tilde_directory'] = directory.count('~')
+        features['qty_dollar_directory'] = directory.count('$')
+        features['qty_plus_directory'] = directory.count('+')
+        features['qty_and_directory'] = directory.count('&')
+        features['qty_slash_directory'] = directory.count('/')
+        features['qty_dot_directory'] = directory.count('.')
+        features['qty_asterisk_directory'] = directory.count('*')
+        features['qty_at_directory'] = directory.count('@')
+        features['qty_questionmark_directory'] = directory.count('?')
+        features['qty_hyphen_directory'] = directory.count('-')
+        features['qty_percent_directory'] = directory.count('%')
+        features['qty_equal_directory'] = directory.count('=')
+        # File features
+        file = url.split('/')[4] if len(url.split('/')) > 4 else ""
+        features['qty_underline_file'] = file.count('_')
+        features['qty_and_file'] = domain.count('&')
+        features['qty_dollar_file'] = domain.count('$')
+        features['qty_questionmark_file'] = domain.count('?')
+        features['qty_equal_file'] = file.count('=')
+        features['qty_slash_file'] = file.count('/')
+        features['qty_exclamation_file'] = file.count('!')
+        features['qty_comma_file'] = file.count(',')
+        features['qty_hashtag_file'] = file.count('#')
+        features['file_length'] = len(file)
+        features['qty_tilde_file'] = file.count('~')
+        features['qty_at_file'] = file.count('@')
+        features['qty_dot_file'] = file.count('.')
+        features['qty_space_file'] = file.count(' ')
+        features['qty_plus_file'] = file.count('+')
+        features['qty_asterisk_file'] = file.count('*')
+        features['qty_hyphen_file'] = file.count('-')
+        features['qty_underline_file'] = file.count('_')
+        features['qty_percent_file'] = file.count('%')
+        features['qty_equal_file'] = file.count('=')
+        # Parameters features
+        params = url.split('?')[1] if '?' in url else ""
+        features['qty_underline_params'] = params.count('_')
+        features['qty_equal_params'] = params.count('=')
+        features['qty_exclamation_params'] = params.count('!')
+        features['qty_comma_params'] = params.count(',')
+        features['qty_hashtag_params'] = params.count('#')
+        features['params_length'] = len(params)
+        features['qty_tilde_params'] = params.count('~')
+        features['qty_asterisk_params'] = params.count('*')
+        features['qty_space_params'] = params.count(' ')
+        features['qty_dollar_params'] = params.count('$')
+        features['qty_questionmark_params'] = params.count('?')
+        features['tld_present_params'] = 1 if get_tld(url, as_object=True, fail_silently=False) else 0
+        features['qty_plus_params'] = params.count('+')
+        features['qty_at_params'] = params.count('@')
+        features['qty_params'] = url.count('?')
+        features['qty_and_params'] = params.count('&')
+        features['qty_hyphen_params'] = params.count('-')
+        features['qty_dot_params'] = params.count('.')
+        features['qty_percent_params'] = params.count('%')
+        features['qty_slash_params'] = params.count('/')
+        # Other features
+        features['email_in_url'] = 1 if '@' in url else 0
+        features['asn_ip'] = get_asn(get_ip_from_url(url))
+        features['qty_ip_resolved'] = get_resolved_ips(domain)
+        features['ttl_hostname'] = get_ttl(domain)
+        features['url_google_index'] = is_url_indexed(url)
+    # Extract domain time features and ensure timestamps
+        features['time_domain_activation'], features['time_domain_expiration'] = get_domain_time_features(domain)
+# Convert activation time to a timestamp if it's a datetime object
+        if isinstance(features['time_domain_activation'], datetime):
+            features['time_domain_activation'] = features['time_domain_activation'].timestamp()
+# Convert expiration time to a timestamp if it's a datetime object
+        if isinstance(features['time_domain_expiration'], datetime):
+            features['time_domain_expiration'] = features['time_domain_expiration'].timestamp()
+        try:
+            features['qty_redirects'] = len(requests.get(url, timeout=5).history)
+        except requests.exceptions.RequestException as e:
+            print(f"Error processing redirects for URL '{url}': {e}")
+            features['qty_redirects'] = -1
+        features['qty_mx_servers'] = get_mx_record_count(domain)
+        features['qty_nameservers'] = get_nameserver_count(domain)
+        features['tls_ssl_certificate'] = get_tls_ssl_certificate(domain)
+        features['time_response'] = get_response_time(url)
+    except Exception as e:
+        print(f"Error extracting features for {url}: {e}")
+        for key in features.keys():
+            features[key] = -1
+    return features
+# Function to count specific characters in a URL
+def count_char_in_url(url, char):
+    try:
+        return url.count(char)
+    except Exception:
+        return -1  # Return -1 if unable to count characters
+# Function to extract domain features from a URL
+def extract_domain_features(url):
+    try:
+        domain = urlparse(url).netloc
+        domain_parts = domain.split('.')
+        tld_length = len(domain_parts[-1])  # Top-level domain length
+        return domain, tld_length
+    except Exception:
+        return -1, -1  # Return -1 if extraction fails
+def is_domain_indexed(domain):
+    query = f"site:{domain}"
+    try:
+        results = list(search(query, num=1))
+        return 1 if results else 0
+    except Exception as e:
+        print(f"Error checking Google index for {domain}: {e}")
+        return -1
+def get_response_time(url):
+    try:
+        start_time = time.time()
+        response = requests.get(url, timeout=10)  # 10-second timeout
+        end_time = time.time()
+        return end_time - start_time  # Response time in seconds
+    except requests.exceptions.RequestException as e:
+        print(f"Error measuring response time for {url}: {e}")
+        return -1  # Return None if there's an error
+def get_mx_record_count(domain):
+    try:
+        # Use dns.resolver.resolve instead of the deprecated query
+        answers = dns.resolver.resolve(domain, 'MX')
+        return len(answers)
+    except dns.resolver.NoAnswer:
+        # No MX records found for the domain
+        print(f"No MX records found for {domain}.")
+        return 0
+    except (dns.resolver.NXDOMAIN, dns.resolver.Timeout, dns.resolver.NoNameservers) as e:
+        # Handle other DNS errors
+        print(f"Error fetching MX records for {domain}: {e}")
+        return -1
+def get_nameserver_count(domain):
+    try:
+        # Query the NS records for the domain
+        ns_records = dns.resolver.resolve(domain, 'NS')
+        return len(ns_records)  # Return the count of NS records
+    except (dns.resolver.NoAnswer, dns.resolver.NXDOMAIN, dns.resolver.Timeout):
+        # Handle cases where no NS records are found or DNS query fails
+        print(f"No NS records found for {domain}.")
+        return 0  # Return 0 if no NS records exist
+# Function to extract directory and file related features
+def extract_path_features(url):
+    try:
+        parsed_url = urlparse(url)
+        path = parsed_url.path
+        return path
+    except Exception:
+        return -1  # Return -1 if extraction fails
+# Function to extract query parameter related features
+def extract_query_features(url):
+    try:
+        parsed_url = urlparse(url)
+        query_params = parse_qs(parsed_url.query)
+        return query_params
+    except Exception:
+        return -1  # Return -1 if extraction fails
+# Function to check if the domain is an IP address format
+def is_ip_address(domain):
+    try:
+        socket.inet_aton(domain)
+        return 1  # It's an IP address
+    except socket.error:
+        return 0  # It's not an IP address
+# Function to get the time-related features
+from datetime import datetime
+def get_domain_time_features(domain):
+    try:
+        domain_info = whois.whois(domain)
+        activation_time = domain_info.creation_date
+        expiration_time = domain_info.expiration_date
+        # Ensure activation_time and expiration_time are valid datetime objects or None
+        if not isinstance(activation_time, (datetime, type(None))):
+            activation_time = None
+        if not isinstance(expiration_time, (datetime, type(None))):
+            expiration_time = None
+        return activation_time, expiration_time
+    except Exception as e:
+        print(f"Error fetching domain times for {domain}: {e}")
+        return -1, -1
+'''def get_domain_time_features(domain):
+    rdap_url = f"https://rdap.org/domain/{domain}"  # RDAP public API endpoint
+    try:
+        response = requests.get(rdap_url, timeout=5)
+        if response.status_code == 200:
+            domain_data = response.json()
+            # Extract activation and expiration dates
+            activation_time = domain_data.get("events", [{}])
+            creation_date = None
+            expiration_date = None
+            for event in activation_time:
+                if event.get("eventAction") == "registration":
+                    creation_date = event.get("eventDate")
+                elif event.get("eventAction") == "expiration":
+                    expiration_date = event.get("eventDate")
+            # Convert string dates to datetime objects
+            creation_date = datetime.fromisoformat(creation_date) if creation_date else 0
+            expiration_date = datetime.fromisoformat(expiration_date) if expiration_date else 0
+            return creation_date, expiration_date
+        elif response.status_code == 404:
+            # Domain does not exist
+            return 0, 0
+        else:
+            # Failed to fetch data for other reasons
+            return -1, -1
+    except Exception as e:
+        print(f"Error fetching domain times for {domain}: {e}")
+        return -1, -1'''
+# Function to get SPF record
+def get_spf_record(domain):
+    try:
+        txt_records = dns.resolver.resolve(domain, 'TXT')
+        for record in txt_records:
+            if "v=spf1" in str(record):
+                return 1
+        return 0
+    except Exception:
+        return -1  # Return -1 if SPF check fails
+# Function to get TLS/SSL certificate
+def get_tls_ssl_certificate(domain):
+    try:
+        context = ssl.create_default_context()
+        with socket.create_connection((domain, 443)) as sock:
+            with context.wrap_socket(sock, server_hostname=domain) as ssock:
+                cert = ssock.getpeercert()
+                return 1  # TLS/SSL certificate exists
+    except Exception:
+        return -1  # Return -1 if SSL check fails
+# Function to get IP address from URL
+def get_ip_from_url(url):
+    try:
+        domain = url.split('/')[2] if '://' in url else url.split('/')[0]
+        ip = socket.gethostbyname(domain)
+        return ip
+    except Exception:
+        return -1  # Return -1 if IP extraction fails
+# Function to get ASN for an IP
+def get_asn(ip):
+    if not ip or ip == -1:
+        return -1  # Return -1 if IP is invalid
+    try:
+        response = requests.get(f"https://ipinfo.io/{ip}/json")
+        data = response.json()
+        org = data.get("org", "Unknown ASN")
+        match = re.search(r'AS(\d+)', org)
+        return int(match.group(1)) if match else -1
+    except Exception:
+        return -1
+# Function to get resolved IPs for a domain
+def get_resolved_ips(domain):
+    try:
+        return len(socket.gethostbyname_ex(domain)[2])
+    except Exception:
+        return -1
+# Function to get TTL value for a domain
+def get_ttl(domain):
+    try:
+        answers = dns.resolver.resolve(domain, 'A')
+        return answers.rrset.ttl
+    except Exception:
+        return -1
+def is_url_indexed(url):
+    query = f"site:{url}"
+    try:
+        results = list(search(query, num=1))
+        return 1 if results else 0
+    except Exception as e:
+        print(f"Error checking if URL is indexed: {e}")
+        return -1
+def process_urls(urls):
+    url_features = []
+    for url in urls:
+        if not (url.startswith("http://") or url.startswith("https://")):
+            url = "https://" + url
+        features = extract_url_features(url)
+        url_features.append(features)
+    return pd.DataFrame(url_features)
+'''def predict_urls(urls, model_path):
+    features_df = process_urls(urls)
+    features_df.fillna(-1, inplace=True)
+    model = CatBoostClassifier()
+    model.load_model(model_path)
+    predictions = model.predict(features_df)
+    return predictions'''
+'''def explain_prediction(features_df, model_path):
+    model = CatBoostClassifier()
+    model.load_model(model_path)
+    from lime.lime_tabular import LimeTabularExplainer
+    explainer = LimeTabularExplainer(
+        training_data=features_df.values,
+        feature_names=features_df.columns.tolist(),
+        class_names=["Legitimate", "Malicious"],
+        mode="classification"
+    )
+    explanation = explainer.explain_instance(
+        data_row=features_df.iloc[0].values,
+        predict_fn=model.predict_proba,
+        num_features=5
+    )
+    explanation.show_in_notebook(show_table=True)
+    return explanation'''
+# Async enhancements for faster processing (optional)
+async def fetch_url(session, url):
+    try:
+        async with session.get(url, timeout=10) as response:
+            return len(response.history)
+    except Exception:
+        return -1
+async def process_urls_async(urls):
+    async with aiohttp.ClientSession() as session:
+        tasks = [fetch_url(session, url) for url in urls]
+        results = await asyncio.gather(*tasks)
+        return results