import torch import gradio as gr from transformers import DistilBertTokenizer, DistilBertForSequenceClassification import torch.nn as nn from torch.serialization import add_safe_globals class HierarchicalLabelEncoder: def __init__(self): self.categories = [ "Any Other Cyber Crime", "Crime Against Women & Children", "Cryptocurrency Crime", "Cyber Attack/ Dependent Crimes", "Cyber Terrorism", "Hacking Damage to computercomputer system etc", "Online Cyber Trafficking", "Online Financial Fraud", "Online Gambling Betting", "Online and Social Media Related Crime", "Ransomware", "Report Unlawful Content" ] self.subcategories_map = { "Any Other Cyber Crime": ["Other"], "Crime Against Women & Children": [ "Computer Generated CSAM/CSEM", "Cyber Blackmailing & Threatening", "Sexual Harassment" ], "Cryptocurrency Crime": ["Cryptocurrency Fraud"], "Cyber Attack/ Dependent Crimes": [ "Data Breach/Theft", "Denial of Service (DoS)/Distributed Denial of Service (DDOS) attacks", "Hacking/Defacement", "Malware Attack", "Ransomware Attack", "SQL Injection", "Tampering with computer source documents" ], "Cyber Terrorism": ["Cyber Terrorism"], "Hacking Damage to computercomputer system etc": [ "Damage to computer computer systems etc", "Email Hacking", "Tampering with computer source documents", "Unauthorised AccessData Breach", "Website DefacementHacking" ], "Online Cyber Trafficking": ["Online Trafficking"], "Online Financial Fraud": [ "Business Email CompromiseEmail Takeover", "DebitCredit Card FraudSim Swap Fraud", "DematDepository Fraud", "EWallet Related Fraud", "Fraud CallVishing", "Internet Banking Related Fraud", "UPI Related Frauds" ], "Online Gambling Betting": ["Online Gambling Betting"], "Online and Social Media Related Crime": [ "Cheating by Impersonation", "Cyber Bullying Stalking Sexting", "EMail Phishing", "FakeImpersonating Profile", "Impersonating Email", "Intimidating Email", "Online Job Fraud", "Online Matrimonial Fraud", "Profile Hacking Identity Theft", "Provocative Speech for unlawful acts" ], "Ransomware": ["Ransomware"], "Report Unlawful Content": ["Against Interest of sovereignty or integrity of India"] } self.category_to_idx = {cat: idx for idx, cat in enumerate(self.categories)} self.idx_to_category = {idx: cat for cat, idx in self.category_to_idx.items()} self.subcategories = [] for subcat_list in self.subcategories_map.values(): self.subcategories.extend(subcat_list) self.subcategories = sorted(list(set(self.subcategories))) self.subcategory_to_idx = {subcat: idx for idx, subcat in enumerate(self.subcategories)} self.idx_to_subcategory = {idx: subcat for subcat, idx in self.subcategory_to_idx.items()} def encode(self, category, subcategory): if category not in self.subcategories_map or subcategory not in self.subcategories_map[category]: raise ValueError(f"Invalid category-subcategory pair: {category} - {subcategory}") return (self.category_to_idx[category], self.subcategory_to_idx[subcategory]) def decode(self, category_idx, subcategory_idx): return (self.idx_to_category[category_idx], self.idx_to_subcategory[subcategory_idx]) class HinglishClassifier(nn.Module): def __init__(self, num_categories, num_subcategories): super().__init__() self.base_model = DistilBertForSequenceClassification.from_pretrained( "distilbert-base-uncased", num_labels=num_categories ) self.subcategory_classifier = nn.Sequential( nn.Linear(768, 512), nn.LayerNorm(512), nn.ReLU(), nn.Dropout(0.2), nn.Linear(512, num_subcategories) ) def forward(self, input_ids, attention_mask): outputs = self.base_model( input_ids=input_ids, attention_mask=attention_mask, output_hidden_states=True ) hidden_state = outputs.hidden_states[-1][:, 0] return outputs.logits, self.subcategory_classifier(hidden_state) device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') label_encoder = None model = None tokenizer = None def load_model_components(): global label_encoder, model, tokenizer try: if model is None: checkpoint = torch.load('final_model.pt', map_location=device, weights_only=False) label_encoder = HierarchicalLabelEncoder() model = HinglishClassifier( num_categories=len(label_encoder.categories), num_subcategories=len(label_encoder.subcategories) ) model.load_state_dict(checkpoint['model_state_dict']) model.to(device) model.eval() tokenizer = DistilBertTokenizer.from_pretrained('tokenizer') except Exception as e: print(f"Error loading model: {str(e)}") raise def predict_category(text): global label_encoder, model, tokenizer if model is None: load_model_components() temperature = 1.0 inputs = tokenizer( text, truncation=True, padding='max_length', max_length=128, return_tensors='pt' ) inputs = {k: v.to(device) for k, v in inputs.items()} with torch.no_grad(): category_logits, subcategory_logits = model( inputs['input_ids'], inputs['attention_mask'] ) category_probs = torch.softmax(category_logits/temperature, dim=1) subcategory_probs = torch.softmax(subcategory_logits/temperature, dim=1) k = 3 top_k_categories = torch.topk(category_probs, k) top_k_subcategories = torch.topk(subcategory_probs, k) # Format results for DataFrame display categories_data = [ [label_encoder.idx_to_category[idx.item()], f"{top_k_categories.values[0][i].item():.2%}"] for i, idx in enumerate(top_k_categories.indices[0]) ] subcategories_data = [ [label_encoder.idx_to_subcategory[idx.item()], f"{top_k_subcategories.values[0][i].item():.2%}"] for i, idx in enumerate(top_k_subcategories.indices[0]) ] return [categories_data, subcategories_data] iface = gr.Interface( title="Cyber Crime Complaint Classifier", description="This system classifies cyber crime complaints into categories and subcategories. Enter your complaint in either Hindi/English/Hinglish text to get started.", fn=predict_category, inputs=[ gr.Textbox( lines=15, label="Enter your complaint text", placeholder="Type your cyber crime complaint here...", scale=6, show_copy_button=True ) ], outputs=[ gr.DataFrame( headers=["Categories", "Confidence"], row_count=3, col_count=2, interactive=False, label="Top 3 Categories" ), gr.DataFrame( headers=["Subcategories", "Confidence"], row_count=3, col_count=2, interactive=False, label="Top 3 Subcategories" ) ], css=""" .gradio-container { max-width: 1200px !important; margin: auto !important; } h1 { text-align: center !important; font-size: 42px !important; font-weight: bold !important; margin-bottom: 16px !important; } p { text-align: left !important; font-size: 18px !important; margin-bottom: 24px !important; } /* Add textbox styling */ .gradio-textbox textarea { font-size: 48px !important; line-height: 1.5 !important; } .primary-category label { font-size: 14px !important; font-weight: 600 !important; } .examples-container { background: #f7f7f7 !important; padding: 20px !important; border-radius: 8px !important; margin-top: 24px !important; border: 1px solid #ddd !important; } .examples-container h3 { font-size: 16px !important; margin-bottom: 12px !important; } .submit-row { display: flex !important; gap: 12px !important; margin-top: 16px !important; } .submit-btn, .clear-btn { flex: 1 !important; padding: 8px 16px !important; } """, examples=[ ["Someone hacked my email account and sent spam to all my contacts."], ["I lost money through a UPI fraud transaction. Mera UPI froud reverse karva dijiye. Paytm customer care number se complaint bhi kiya hai."], ["Someone created fake Instagram profile with my daughter's photos and sending vulgar messages"], ["Ek ladke ne meri beti ko social media pe harass kiya aur uske private photos leak karne ki dhamki di"], ["Mere bank account se UPI fraud ke through 50,000 rupay nikal liye"], ["Koi mere naam se fake Facebook profile bana ke mere dosto ko paise ke liye message kar raha hai"], ["Kisi ne mere laptop ko hack karke usme se personal photos access kar liye hain. Ab wo photos viral karne ki dhamki dekar paise ki demand kar rahe hain. Ye bahut sensitive personal data breach hai"], ["Dating app pe match hone ke baad video call pe compromising situation record kar li aur ab blackmail kar rahe hain. Bahut badi amount ki demand kar rahe hain warna video viral karne ki dhamki de rahe hain."], ["Mere computer pe ransomware attack hua hai, sara data encrypt ho gaya hai"], ["Online cricket betting app pe paise lagaye, ab withdrawal nahi kar pa raha"], ["Maine cryptocurrency trading bot ke subscription ke paise pay kiye. Bot ne automatic trading karke sara balance loss kar diya aur ab company ka koi response nahi hai."], ["Maine online cricket betting app pe significant amount invest kiya tha. Initially small amounts ki withdrawal hui lekin ab bade amount ki withdrawal request process nahi ho rahi. App operators ka koi response nahi aa raha hai."] ] ) if __name__ == "__main__": add_safe_globals([HierarchicalLabelEncoder]) iface.launch(share=True)