File size: 4,164 Bytes
c57c848
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
18e11fc
c57c848
 
 
 
 
 
328a8f1
18e11fc
c57c848
5b36832
 
c57c848
5b36832
18e11fc
c57c848
18e11fc
 
 
 
 
 
5b36832
0fd14a7
18e11fc
c57c848
0fd14a7
 
 
 
 
328a8f1
5b36832
0fd14a7
c57c848
 
 
 
18e11fc
c57c848
 
 
 
5b36832
c57c848
 
 
328a8f1
c57c848
18e11fc
c57c848
 
 
328a8f1
 
 
 
 
5b36832
 
 
 
 
 
 
328a8f1
 
 
c57c848
 
 
 
 
 
328a8f1
c57c848
 
 
 
328a8f1
c57c848
328a8f1
 
 
 
c57c848
 
 
 
 
5b36832
c57c848
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
import gradio as gr
from transformers import AutoTokenizer, AutoModelForSequenceClassification
import torch

model_path = "modernbert.bin"
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

tokenizer = AutoTokenizer.from_pretrained("answerdotai/ModernBERT-base")
model = AutoModelForSequenceClassification.from_pretrained("answerdotai/ModernBERT-base", num_labels=41)
model.load_state_dict(torch.load(model_path, map_location=device))
model.to(device)
model.eval()

label_mapping = {
    0: '13B', 1: '30B', 2: '65B', 3: '7B', 4: 'GLM130B', 5: 'bloom_7b',
    6: 'bloomz', 7: 'cohere', 8: 'davinci', 9: 'dolly', 10: 'dolly-v2-12b',
    11: 'flan_t5_base', 12: 'flan_t5_large', 13: 'flan_t5_small', 
    14: 'flan_t5_xl', 15: 'flan_t5_xxl', 16: 'gemma-7b-it', 17: 'gemma2-9b-it',
    18: 'gpt-3.5-turbo', 19: 'gpt-35', 20: 'gpt4', 21: 'gpt4o', 
    22: 'gpt_j', 23: 'gpt_neox', 24: 'human', 25: 'llama3-70b', 26: 'llama3-8b',
    27: 'mixtral-8x7b', 28: 'opt_1.3b', 29: 'opt_125m', 30: 'opt_13b',
    31: 'opt_2.7b', 32: 'opt_30b', 33: 'opt_350m', 34: 'opt_6.7b',
    35: 'opt_iml_30b', 36: 'opt_iml_max_1.3b', 37: 't0_11b', 38: 't0_3b',
    39: 'text-davinci-002', 40: 'text-davinci-003'
}

def classify_text(text):
    inputs = tokenizer(text, return_tensors="pt", truncation=True)
    inputs = {key: value.to(device) for key, value in inputs.items()}
    
    with torch.no_grad():
        outputs = model(**inputs)
        probabilities = torch.softmax(outputs.logits, dim=1)[0]
        predicted_class = torch.argmax(probabilities).item()
        confidence = probabilities[predicted_class].item() * 100

        if predicted_class == 24:
            prediction_label = f"✅ - The text is **{confidence:.2f}%** likely **Human written**."
            model_info = ""  
        else:
            prediction_label = f"🤖 - The text is **{confidence:.2f}%** likely **AI generated**."
            model_info = f"**Identified AI Model:** {label_mapping[predicted_class]}"

    result_message = f"**Result:**\n\n{prediction_label}"
    if model_info:
        result_message += f"\n\n{model_info}"
        
    return result_message

title = "AI Text Detector"
description = """
**AI detection tool by SzegedAI**

Detect AI-generated texts with precision using the new **ModernBERT** model, fine-tuned for machine-generated text detection, and capable of identifying 40 different models.

- **🤖 Identify AI Models**: Reveals which LLM generated the text if detected as AI.
- **✅ Human Verification**: Marks confidently human-written text with a green checkmark.

**Note:** The longer the text, the better the detection accuracy.

"""

iface = gr.Interface(
    fn=classify_text,
    inputs=gr.Textbox(
        label="Enter Text for Analysis", 
        placeholder="Type or paste your content here...",
        lines=5,
        elem_id="text_input_box"
    ),
    outputs=gr.Markdown(elem_id="result_output_box"),
    title=title,
    description=description,
    allow_flagging="never", 
    live=True,  
    css="""
    #text_input_box {
        border-radius: 10px;
        border: 2px solid #4CAF50;
        font-size: 18px;
        padding: 15px;
        text-align: center;
        margin: 0 auto;
        display: block;
        width: 80%;
    }
    #result_output_box {
        border-radius: 10px;
        border: 2px solid #4CAF50;
        font-size: 18px;
        padding: 15px;
        background-color: #2E2E3F;
        margin: 20px auto;
        width: 80%;
        text-align: center;
    }
    body {
        background: #1E1E2F;
        color: #E1E1E6;
        font-family: 'Aptos', sans-serif;
        padding: 20px;
        text-align: center;
    }
    .gradio-container {
        border: 2px solid #4CAF50;
        border-radius: 15px;
        padding: 30px;
        box-shadow: 0px 0px 20px rgba(0,255,0,0.6);
        margin-top: 40px;
        max-width: 600px;
        margin-left: auto;
        margin-right: auto;
    }
    h1, h2 {
        text-align: center;
        font-size: 32px;
        font-weight: bold;
        margin-bottom: 20px;
    }
    """
)

if __name__ == "__main__":
    iface.launch(share=True)