ddas's picture
model hosted externally
82af392 unverified
raw
history blame
3.56 kB
#!/usr/bin/env python3
"""
Upload the instruction classifier model to Hugging Face Model Hub
"""
from huggingface_hub import HfApi, login
import os
def upload_model():
# You'll need to login first: huggingface-cli login
# Or set HUGGINGFACE_TOKEN environment variable
api = HfApi()
# Replace with your username and repository name
repo_id = "ddas/instruction-classifier-model" # CHANGE THIS!
try:
# Create repository if it doesn't exist (set private=True for private repo)
api.create_repo(repo_id, repo_type="model", exist_ok=True, private=True)
print(f"βœ… Private repository {repo_id} created/verified")
# Upload the model file
api.upload_file(
path_or_fileobj="models/best_instruction_classifier.pth",
path_in_repo="best_instruction_classifier.pth",
repo_id=repo_id,
repo_type="model",
)
print(f"βœ… Model uploaded to {repo_id}")
# Upload a README for the model
readme_content = f"""# Instruction Classifier Model
This model is trained to detect instruction-like tokens in text for prompt injection defense.
## Model Details
- Architecture: XLM-RoBERTa base with classification head
- Task: Token classification (instruction vs. other)
- Training: Sliding window approach with diverse datasets
- Size: ~1GB
- Parameters: ~278M
## Usage
```python
from huggingface_hub import hf_hub_download
import torch
from transformers import AutoTokenizer
# You'll need the TransformerInstructionClassifier class from utils.py
# from utils import TransformerInstructionClassifier
# Download model file (returns path, not model object)
model_path = hf_hub_download(
repo_id="{repo_id}",
filename="best_instruction_classifier.pth",
token="your_hf_token_if_private" # Only needed for private repos
)
# Create model instance
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model = TransformerInstructionClassifier(
model_name='xlm-roberta-base',
num_labels=2,
dropout=0.1
)
# Load weights from downloaded file
checkpoint = torch.load(model_path, map_location=device)
# Filter out loss function weights if present
model_state_dict = {{}}
for key, value in checkpoint.items():
if not key.startswith('loss_fct'):
model_state_dict[key] = value
model.load_state_dict(model_state_dict, strict=False)
model.to(device)
model.eval()
print("βœ… Model loaded successfully!")
```
## Direct Usage with Instruction Classifier
```python
from instruction_classifier import sanitize_tool_output
# This will automatically download and use the model
result = sanitize_tool_output("Your text to check for injections")
```
## License
[Specify your license here]
"""
api.upload_file(
path_or_fileobj=readme_content.encode(),
path_in_repo="README.md",
repo_id=repo_id,
repo_type="model",
)
print(f"βœ… README uploaded")
print(f"\nπŸŽ‰ Model successfully uploaded to: https://huggingface.co/{repo_id}")
print(f"\nUpdate your instruction_classifier.py with:")
print(f'model_path = hf_hub_download(repo_id="{repo_id}", filename="best_instruction_classifier.pth")')
except Exception as e:
print(f"❌ Error uploading model: {e}")
print("\nMake sure to:")
print("1. Run: huggingface-cli login")
print("2. Update repo_id with your username")
if __name__ == "__main__":
upload_model()