Spaces:
Running
Running
File size: 5,229 Bytes
c57c848 f97e9a9 c57c848 e09232a f97e9a9 c57c848 f97e9a9 18e11fc f97e9a9 25dea08 db8f51d 25dea08 9be0260 0b2db5d 0fd14a7 0b2db5d cd3b61f 0b2db5d cd3b61f 25dea08 0fd14a7 25dea08 da1ebea 911b4eb db8f51d 18e11fc c57c848 5e5a50a 899a753 da1ebea fcf78e8 5b36832 d8fb0cb 020ecdf 5b36832 fcf78e8 899a753 da1ebea 5e5a50a fcf78e8 c57c848 020ecdf 25dea08 76a6ea9 25dea08 d89e652 76a6ea9 d89e652 020ecdf c57c848 0b2db5d c57c848 b5eddd2 fcf78e8 25dea08 c57c848 25dea08 c57c848 db8f51d c57c848 328a8f1 db8f51d da1ebea 9054596 c57c848 25dea08 da1ebea c57c848 25dea08 c57c848 25dea08 da1ebea 25dea08 da1ebea 25dea08 da1ebea 0b2db5d 911b4eb cd3b61f 911b4eb c57c848 911b4eb 0b2db5d |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 |
import gradio as gr
from transformers import AutoTokenizer, AutoModelForSequenceClassification
import torch
model_path = "modernbert.bin"
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
tokenizer = AutoTokenizer.from_pretrained("answerdotai/ModernBERT-base")
model = AutoModelForSequenceClassification.from_pretrained("answerdotai/ModernBERT-base", num_labels=41)
model.load_state_dict(torch.load(model_path, map_location=device))
model.to(device).eval()
label_mapping = {
0: '13B', 1: '30B', 2: '65B', 3: '7B', 4: 'GLM130B', 5: 'bloom_7b',
6: 'bloomz', 7: 'cohere', 8: 'davinci', 9: 'dolly', 10: 'dolly-v2-12b',
11: 'flan_t5_base', 12: 'flan_t5_large', 13: 'flan_t5_small',
14: 'flan_t5_xl', 15: 'flan_t5_xxl', 16: 'gemma-7b-it', 17: 'gemma2-9b-it',
18: 'gpt-3.5-turbo', 19: 'gpt-35', 20: 'gpt4', 21: 'gpt4o',
22: 'gpt_j', 23: 'gpt_neox', 24: 'human', 25: 'llama3-70b', 26: 'llama3-8b',
27: 'mixtral-8x7b', 28: 'opt_1.3b', 29: 'opt_125m', 30: 'opt_13b',
31: 'opt_2.7b', 32: 'opt_30b', 33: 'opt_350m', 34: 'opt_6.7b',
35: 'opt_iml_30b', 36: 'opt_iml_max_1.3b', 37: 't0_11b', 38: 't0_3b',
39: 'text-davinci-002', 40: 'text-davinci-003'
}
def classify_text(text):
if not text.strip():
return "----"
inputs = tokenizer(text, return_tensors="pt", truncation=True, padding=True).to(device)
with torch.no_grad():
probabilities = torch.softmax(model(**inputs).logits, dim=1)[0]
human_prob = probabilities[24].item() * 100
ai_probs = probabilities.clone()
ai_probs[24] = 0
ai_total_prob = ai_probs.sum().item() * 100
ai_argmax_index = torch.argmax(ai_probs).item()
ai_argmax_model = label_mapping[ai_argmax_index]
if human_prob > ai_total_prob:
result_message = (
f"β
- The text is <span class='highlight-human'>**{human_prob:.2f}%** likely <b>Human written</b>.</span>"
)
else:
result_message = (
f"π€ - The text is <span class='highlight-ai'>**{ai_total_prob:.2f}%** likely <b>AI generated</b>.</span>\n\n"
f"**Identified AI Model:** {ai_argmax_model}"
)
return result_message
title = "AI Text Detector"
description = """
This tool uses the **ModernBERT** model to identify whether a given text was written by a human or generated by artificial intelligence (AI).
<br>
<div style="line-height: 1.8;">
β
<b>Human Verification:</b> Human-written content is clearly marked.<br>
π <b>Model Detection:</b> Can identify content from over 40 AI models.<br>
π <b>Accuracy:</b> Works best with longer texts for improved precision.
</div>
<br>
Paste your text below to analyze its origin.
"""
bottom_text = "**Developed by SzegedAI**"
iface = gr.Blocks(css="""
@import url('https://fonts.googleapis.com/css2?family=Roboto+Mono:wght@400;700&display=swap');
#text_input_box {
border-radius: 10px;
border: 2px solid #4CAF50;
font-size: 18px;
padding: 15px;
margin-bottom: 20px;
width: 60%;
box-sizing: border-box;
margin: auto;
}
.form.svelte-633qhp {
background: none;
border: none;
box-shadow: none;
}
#result_output_box {
border-radius: 10px;
border: 2px solid #4CAF50;
font-size: 18px;
padding: 15px;
margin-top: 20px;
width: 40%;
box-sizing: border-box;
text-align: center;
margin: auto;
}
@media (max-width: 768px) {
#result_output_box {
width: 100%;
}
#text_input_box{
width: 100%;
}
}
body {
font-family: 'Roboto Mono', sans-serif !important;
padding: 20px;
display: block;
justify-content: center;
align-items: center;
height: 100vh;
overflow-y: auto;
}
.gradio-container {
border: 1px solid #4CAF50;
border-radius: 15px;
padding: 30px;
box-shadow: 0px 0px 10px rgba(0,255,0,0.6);
max-width: 600px;
margin: auto;
overflow-y: auto;
}
h1 {
text-align: center;
font-size: 32px;
font-weight: bold;
margin-bottom: 30px;
}
.highlight-human {
color: #4CAF50;
font-weight: bold;
background: rgba(76, 175, 80, 0.2);
padding: 5px;
border-radius: 8px;
}
.highlight-ai {
color: #FF5733;
font-weight: bold;
background: rgba(255, 87, 51, 0.2);
padding: 5px;
border-radius: 8px;
}
#bottom_text {
text-align: center;
margin-top: 50px;
font-weight: bold;
font-size: 20px;
}
.block.svelte-11xb1hd{
background: none !important;
}
""")
with iface:
gr.Markdown(f"# {title}")
gr.Markdown(description)
text_input = gr.Textbox(label="", placeholder="Type or paste your content here...", elem_id="text_input_box", lines=5)
result_output = gr.Markdown("**Results will appear here...**", elem_id="result_output_box")
text_input.change(classify_text, inputs=text_input, outputs=result_output)
gr.Markdown(bottom_text, elem_id="bottom_text")
iface.launch(share=True)
|