Spaces:
Sleeping
Sleeping
File size: 3,384 Bytes
ced22e1 1646c3f ced22e1 f69939b 4c32405 ced22e1 b08284c 1646c3f 95a596a b08284c 95a596a b08284c 95a596a 1646c3f b08284c 4e37c20 b08284c 4e37c20 a00ff89 b08284c 1646c3f b08284c 1646c3f b08284c 1646c3f b08284c 865820b b08284c 4e37c20 b08284c d4eaaae 902d69b b08284c |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 |
import streamlit as st
from transformers import pipeline
import re
# تحميل النموذجssssss
classifier = pipeline("zero-shot-classification", model="cross-encoder/nli-distilroberta-base")
# عنوان التطبيق
st.title("URL Analysis App")
# إدخال الملف النصي
uploaded_file = st.file_uploader("Upload a text file containing URLs", type=["txt"])
if uploaded_file is not None:
# قراءة الملف النصي
content = uploaded_file.read().decode("utf-8")
urls = [line.strip() for line in content.splitlines() if line.strip()]
# قوائم لتخزين النتائج
parameters = []
domains = []
full_page_types = []
file_extensions = []
# دالة تحليل الروابط
def analyze_urls(urls):
for url in urls:
# استخراج الباراميترات باستخدام RegEx
params = re.findall(r'(\w+)=', url)
parameters.extend(params)
# استخtraction نطاقات (.com, .uk, .au)
domain_match = re.search(r'\.([a-zA-Z]+)', url)
if domain_match:
domain = domain_match.group(1)
if domain not in domains:
domains.append(domain)
# استخراج أنماط الصفحات الكاملة (product_detail.php?, viewtopic.php?)
page_type_match = re.search(r'(\w+\.[a-z]+)\?', url)
if page_type_match:
page_type = page_type_match.group(1)
if page_type not in full_page_types:
full_page_types.append(page_type)
# استخراج الصيغ (php, phtml, asp) بدون علامات الاستفهام
extension_match = re.search(r'(\w+\.[a-z]+)(\?|$)', url)
if extension_match:
extension = extension_match.group(1).split('?')[0]
if extension not in file_extensions:
file_extensions.append(extension)
# زر البدء
if st.button("Start"):
# تحليل الروابط
analyze_urls(urls)
# إزالة التكرارات من القوائم
parameters = list(set(parameters))
domains = list(set(domains))
full_page_types = list(set(full_page_types))
file_extensions = list(set(file_extensions))
# عرض النتائج
st.header("Parameters")
st.text_area("Copy the parameters here:", value="\n".join(parameters), height=200, key="parameters")
st.button("Copy Parameters", on_click=lambda: st.clipboard.copy("\n".join(parameters)))
st.header("Domains")
st.text_area("Copy the domains here:", value="\n".join(domains), height=200, key="domains")
st.button("Copy Domains", on_click=lambda: st.clipboard.copy("\n".join(domains)))
st.header("Full PageType")
st.text_area("Copy the full page types here:", value="\n".join(full_page_types), height=200, key="full_page_types")
st.button("Copy Full PageTypes", on_click=lambda: st.clipboard.copy("\n".join(full_page_types)))
st.header("File Extensions")
st.text_area("Copy the file extensions here:", value="\n".join(file_extensions), height=200, key="file_extensions")
st.button("Copy File Extensions", on_click=lambda: st.clipboard.copy("\n".join(file_extensions)))
else:
st.warning("Please upload a text file containing URLs to start analysis.") |