Spaces:
Running
Running
File size: 4,870 Bytes
c57c848 e09232a 18e11fc c57c848 328a8f1 18e11fc c57c848 da1ebea 5b36832 c57c848 da1ebea 18e11fc c57c848 18e11fc 25dea08 db8f51d 25dea08 9be0260 0fd14a7 25dea08 cd3b61f 25dea08 cd3b61f 25dea08 0fd14a7 c57c848 25dea08 da1ebea 911b4eb db8f51d 18e11fc c57c848 5e5a50a 899a753 da1ebea fcf78e8 5b36832 020ecdf 5b36832 fcf78e8 899a753 da1ebea 5e5a50a fcf78e8 c57c848 020ecdf 25dea08 020ecdf c57c848 fa66b44 c57c848 b5eddd2 fcf78e8 25dea08 c57c848 25dea08 c57c848 db8f51d c57c848 328a8f1 db8f51d da1ebea c57c848 25dea08 da1ebea c57c848 25dea08 c57c848 25dea08 da1ebea 25dea08 da1ebea 25dea08 da1ebea 911b4eb cd3b61f 911b4eb c57c848 911b4eb |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 |
import gradio as gr
from transformers import AutoTokenizer, AutoModelForSequenceClassification
import torch
model_path = "modernbert.bin"
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
tokenizer = AutoTokenizer.from_pretrained("answerdotai/ModernBERT-base")
model = AutoModelForSequenceClassification.from_pretrained("answerdotai/ModernBERT-base", num_labels=41)
model.load_state_dict(torch.load(model_path, map_location=device))
model.to(device)
model.eval()
label_mapping = {
0: '13B', 1: '30B', 2: '65B', 3: '7B', 4: 'GLM130B', 5: 'bloom_7b',
6: 'bloomz', 7: 'cohere', 8: 'davinci', 9: 'dolly', 10: 'dolly-v2-12b',
11: 'flan_t5_base', 12: 'flan_t5_large', 13: 'flan_t5_small',
14: 'flan_t5_xl', 15: 'flan_t5_xxl', 16: 'gemma-7b-it', 17: 'gemma2-9b-it',
18: 'gpt-3.5-turbo', 19: 'gpt-35', 20: 'gpt4', 21: 'gpt4o',
22: 'gpt_j', 23: 'gpt_neox', 24: 'human', 25: 'llama3-70b', 26: 'llama3-8b',
27: 'mixtral-8x7b', 28: 'opt_1.3b', 29: 'opt_125m', 30: 'opt_13b',
31: 'opt_2.7b', 32: 'opt_30b', 33: 'opt_350m', 34: 'opt_6.7b',
35: 'opt_iml_30b', 36: 'opt_iml_max_1.3b', 37: 't0_11b', 38: 't0_3b',
39: 'text-davinci-002', 40: 'text-davinci-003'
}
def classify_text(text):
if not text.strip():
return "----"
inputs = tokenizer(text, return_tensors="pt", truncation=True)
inputs = {key: value.to(device) for key, value in inputs.items()}
with torch.no_grad():
outputs = model(**inputs)
probabilities = torch.softmax(outputs.logits, dim=1)[0]
predicted_class = torch.argmax(probabilities).item()
confidence = probabilities[predicted_class].item() * 100
if predicted_class == 24:
prediction_label = f"β
- The text is <span class='highlight-human'>**{confidence:.2f}%** likely <b>Human written</b>.</span>"
model_info = ""
else:
prediction_label = f"π€ - The text is <span class='highlight-ai'>**{confidence:.2f}%** likely <b>AI generated</b>.</span>"
model_info = f"**Identified AI Model:** {label_mapping[predicted_class]}"
result_message = f"**Result:**\n\n{prediction_label}"
if model_info:
result_message += f"\n\n{model_info}"
return result_message
title = "AI Text Detector"
description = """
This tool uses the **ModernBERT** model to identify whether a given text was written by a human or generated by AI.
- π **Model Detection:** Can identify content from over 40 AI models.
- β
**Human Verification:** Human-written content is clearly marked.
- π **Accuracy:** Works best with longer texts for improved precision.
Paste your text below to analyze its origin.
"""
bottom_text = "**Developed by SzegedAI**"
iface = gr.Blocks(css="""
@import url('https://fonts.googleapis.com/css2?family=Roboto+Mono:wght@400;700&display=swap');
#text_input_box {
border-radius: 10px;
border: 2px solid #4CAF50;
font-size: 18px;
padding: 15px;
margin-bottom: 20px;
width: 60%;
box-sizing: border-box;
margin: auto;
}
#result_output_box {
border-radius: 10px;
border: 2px solid #4CAF50;
font-size: 18px;
padding: 15px;
margin-top: 20px;
width: 40%;
box-sizing: border-box;
text-align: center;
margin: auto;
}
@media (max-width: 768px) {
#result_output_box {
width: 80%;
}
}
body {
font-family: 'Roboto Mono', sans-serif;
padding: 20px;
display: block;
justify-content: center;
align-items: center;
height: 100vh;
overflow-y: auto;
}
.gradio-container {
border: 1px solid #4CAF50;
border-radius: 15px;
padding: 30px;
box-shadow: 0px 0px 10px rgba(0,255,0,0.6);
max-width: 600px;
margin: auto;
}
h1 {
text-align: center;
font-size: 32px;
font-weight: bold;
margin-bottom: 30px;
}
.highlight-human {
color: #4CAF50;
font-weight: bold;
background: rgba(76, 175, 80, 0.2);
padding: 5px;
border-radius: 8px;
}
.highlight-ai {
color: #FF5733;
font-weight: bold;
background: rgba(255, 87, 51, 0.2);
padding: 5px;
border-radius: 8px;
}
""")
with iface:
gr.Markdown(f"# {title}")
gr.Markdown(description)
text_input = gr.Textbox(label="", placeholder="Type or paste your content here...", elem_id="text_input_box", lines=5)
result_output = gr.Markdown("**Results will appear here...**", elem_id="result_output_box")
text_input.change(classify_text, inputs=text_input, outputs=result_output)
gr.Markdown(bottom_text, elem_id="bottom_text")
iface.launch(share=True)
|