import streamlit as st from transformers import pipeline import re # تحميل النموذجssssss classifier = pipeline("zero-shot-classification", model="cross-encoder/nli-distilroberta-base") # عنوان التطبيق st.title("URL Analysis App") # إدخال الملف النصي uploaded_file = st.file_uploader("Upload a text file containing URLs", type=["txt"]) if uploaded_file is not None: # قراءة الملف النصي content = uploaded_file.read().decode("utf-8") urls = [line.strip() for line in content.splitlines() if line.strip()] # قوائم لتخزين النتائج parameters = [] domains = [] full_page_types = [] file_extensions = [] # دالة تحليل الروابط def analyze_urls(urls): for url in urls: # استخراج الباراميترات باستخدام RegEx params = re.findall(r'(\w+)=', url) parameters.extend(params) # استخtraction نطاقات (.com, .uk, .au) domain_match = re.search(r'\.([a-zA-Z]+)', url) if domain_match: domain = domain_match.group(1) if domain not in domains: domains.append(domain) # استخراج أنماط الصفحات الكاملة (product_detail.php?, viewtopic.php?) page_type_match = re.search(r'(\w+\.[a-z]+)\?', url) if page_type_match: page_type = page_type_match.group(1) if page_type not in full_page_types: full_page_types.append(page_type) # استخراج الصيغ (php, phtml, asp) بدون علامات الاستفهام extension_match = re.search(r'(\w+\.[a-z]+)(\?|$)', url) if extension_match: extension = extension_match.group(1).split('?')[0] if extension not in file_extensions: file_extensions.append(extension) # زر البدء if st.button("Start"): # تحليل الروابط analyze_urls(urls) # إزالة التكرارات من القوائم parameters = list(set(parameters)) domains = list(set(domains)) full_page_types = list(set(full_page_types)) file_extensions = list(set(file_extensions)) # عرض النتائج st.header("Parameters") st.text_area("Copy the parameters here:", value="\n".join(parameters), height=200, key="parameters") st.button("Copy Parameters", on_click=lambda: st.clipboard.copy("\n".join(parameters))) st.header("Domains") st.text_area("Copy the domains here:", value="\n".join(domains), height=200, key="domains") st.button("Copy Domains", on_click=lambda: st.clipboard.copy("\n".join(domains))) st.header("Full PageType") st.text_area("Copy the full page types here:", value="\n".join(full_page_types), height=200, key="full_page_types") st.button("Copy Full PageTypes", on_click=lambda: st.clipboard.copy("\n".join(full_page_types))) st.header("File Extensions") st.text_area("Copy the file extensions here:", value="\n".join(file_extensions), height=200, key="file_extensions") st.button("Copy File Extensions", on_click=lambda: st.clipboard.copy("\n".join(file_extensions))) else: st.warning("Please upload a text file containing URLs to start analysis.")