#!/usr/bin/env python3 """ Upload the instruction classifier model to Hugging Face Model Hub """ from huggingface_hub import HfApi, login import os def upload_model(): # You'll need to login first: huggingface-cli login # Or set HUGGINGFACE_TOKEN environment variable api = HfApi() # Replace with your username and repository name repo_id = "ddas/instruction-classifier-model" # CHANGE THIS! try: # Create repository if it doesn't exist (set private=True for private repo) api.create_repo(repo_id, repo_type="model", exist_ok=True, private=True) print(f"āœ… Private repository {repo_id} created/verified") # Upload the model file api.upload_file( path_or_fileobj="models/best_instruction_classifier.pth", path_in_repo="best_instruction_classifier.pth", repo_id=repo_id, repo_type="model", ) print(f"āœ… Model uploaded to {repo_id}") # Upload a README for the model readme_content = f"""# Instruction Classifier Model This model is trained to detect instruction-like tokens in text for prompt injection defense. ## Model Details - Architecture: XLM-RoBERTa base with classification head - Task: Token classification (instruction vs. other) - Training: Sliding window approach with diverse datasets - Size: ~1GB - Parameters: ~278M ## Usage ```python from huggingface_hub import hf_hub_download import torch from transformers import AutoTokenizer # You'll need the TransformerInstructionClassifier class from utils.py # from utils import TransformerInstructionClassifier # Download model file (returns path, not model object) model_path = hf_hub_download( repo_id="{repo_id}", filename="best_instruction_classifier.pth", token="your_hf_token_if_private" # Only needed for private repos ) # Create model instance device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') model = TransformerInstructionClassifier( model_name='xlm-roberta-base', num_labels=2, dropout=0.1 ) # Load weights from downloaded file checkpoint = torch.load(model_path, map_location=device) # Filter out loss function weights if present model_state_dict = {{}} for key, value in checkpoint.items(): if not key.startswith('loss_fct'): model_state_dict[key] = value model.load_state_dict(model_state_dict, strict=False) model.to(device) model.eval() print("āœ… Model loaded successfully!") ``` ## Direct Usage with Instruction Classifier ```python from instruction_classifier import sanitize_tool_output # This will automatically download and use the model result = sanitize_tool_output("Your text to check for injections") ``` ## License [Specify your license here] """ api.upload_file( path_or_fileobj=readme_content.encode(), path_in_repo="README.md", repo_id=repo_id, repo_type="model", ) print(f"āœ… README uploaded") print(f"\nšŸŽ‰ Model successfully uploaded to: https://huggingface.co/{repo_id}") print(f"\nUpdate your instruction_classifier.py with:") print(f'model_path = hf_hub_download(repo_id="{repo_id}", filename="best_instruction_classifier.pth")') except Exception as e: print(f"āŒ Error uploading model: {e}") print("\nMake sure to:") print("1. Run: huggingface-cli login") print("2. Update repo_id with your username") if __name__ == "__main__": upload_model()