File size: 6,024 Bytes
84669bc
29edf23
7feda08
6ba2176
7fc55d1
 
7feda08
59d6d08
8e09e8c
7fc55d1
 
 
6ba2176
 
 
 
 
 
 
7feda08
 
 
 
c93f011
 
 
59d6d08
 
 
 
 
41941cd
 
5065a5b
2ff4e71
 
41941cd
2ff4e71
41941cd
2ff4e71
 
c930ce3
2ff4e71
5065a5b
 
 
 
 
 
 
 
524ff09
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3c39506
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
5065a5b
 
 
 
 
73ae45e
5065a5b
 
 
 
 
 
 
 
 
 
 
 
73ae45e
5065a5b
 
 
 
 
73ae45e
5065a5b
73ae45e
3c39506
 
5065a5b
73ae45e
5065a5b
59d6d08
 
13a208e
59d6d08
3da716d
59d6d08
41941cd
 
 
 
524ff09
 
 
 
 
3da716d
59d6d08
 
41941cd
 
 
5065a5b
 
 
 
 
 
3c39506
5065a5b
 
 
 
41941cd
5065a5b
 
041e7ca
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
import gradio as gr
import torch
import spacy
import subprocess
import nltk
from nltk.corpus import wordnet
from gensim import downloader as api
from gramformer import Gramformer

# Ensure necessary NLTK data is downloaded
nltk.download('wordnet')
nltk.download('omw-1.4')

# Ensure the spaCy model is installed
try:
    nlp = spacy.load("en_core_web_sm")
except OSError:
    subprocess.run(["python", "-m", "spacy", "download", "en_core_web_sm"])
    nlp = spacy.load("en_core_web_sm")

# Load a smaller Word2Vec model from Gensim's pre-trained models
word_vectors = api.load("glove-wiki-gigaword-50")

# Check for GPU and set the device accordingly
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# Load Gramformer for grammar correction (model 2 for correction)
gf = Gramformer(models=2, use_gpu=torch.cuda.is_available())

# AI detection model and tokenizer remain the same as before
from transformers import AutoTokenizer, AutoModelForSequenceClassification
tokenizer_ai = AutoTokenizer.from_pretrained("distilbert-base-uncased-finetuned-sst-2-english")
model_ai = AutoModelForSequenceClassification.from_pretrained("distilbert-base-uncased-finetuned-sst-2-english").to(device)

# AI detection function using DistilBERT
def detect_ai_generated(text):
    inputs = tokenizer_ai(text, return_tensors="pt", truncation=True, max_length=512).to(device)
    with torch.no_grad():
        outputs = model_ai(**inputs)
        probabilities = torch.softmax(outputs.logits, dim=1)
    ai_probability = probabilities[0][1].item()  # Probability of being AI-generated
    return f"AI-Generated Content Probability: {ai_probability * 100:.2f}%"

# Function to get synonyms using NLTK WordNet
def get_synonyms_nltk(word, pos):
    synsets = wordnet.synsets(word, pos=pos)
    if synsets:
        lemmas = synsets[0].lemmas()
        return [lemma.name() for lemma in lemmas]
    return []

# Function to check and correct tenses and verbs using spaCy
def check_tense_and_correct(text):
    doc = nlp(text)
    corrected_text = []
    
    for token in doc:
        # Checking for verbs and their tense
        if token.pos_ == 'VERB':
            tense = token.tag_  # Get the specific tense tag (e.g., VBZ, VBD, VBG, etc.)
            
            if tense == 'VBZ':  # 3rd person singular present
                corrected_text.append(token.lemma_)  # Replace with base form (example: goes -> go)
            elif tense == 'VBD':  # Past tense
                corrected_text.append(token.text)  # Keep past tense as is
            elif tense == 'VBG':  # Gerund/Present participle
                corrected_text.append(token.text)  # Keep it unchanged for now
            else:
                corrected_text.append(token.text)  # For other cases, append the word as is
        else:
            corrected_text.append(token.text)
    
    return ' '.join(corrected_text)

# Function to capitalize the first letter of sentences and proper nouns
def capitalize_sentences_and_nouns(text):
    doc = nlp(text)
    corrected_text = []

    for sent in doc.sents:
        sentence = []
        for token in sent:
            if token.i == sent.start:  # First word of the sentence
                sentence.append(token.text.capitalize())
            elif token.pos_ == "PROPN":  # Proper noun
                sentence.append(token.text.capitalize())
            else:
                sentence.append(token.text)
        corrected_text.append(' '.join(sentence))

    return ' '.join(corrected_text)

# Paraphrasing function using spaCy and NLTK
def paraphrase_with_spacy_nltk(text):
    doc = nlp(text)
    paraphrased_words = []
    
    for token in doc:
        # Map spaCy POS tags to WordNet POS tags
        pos = None
        if token.pos_ in {"NOUN"}:
            pos = wordnet.NOUN
        elif token.pos_ in {"VERB"}:
            pos = wordnet.VERB
        elif token.pos_ in {"ADJ"}:
            pos = wordnet.ADJ
        elif token.pos_ in {"ADV"}:
            pos = wordnet.ADV
        
        synonyms = get_synonyms_nltk(token.text.lower(), pos) if pos else []
        
        # Replace with a synonym only if it makes sense
        if synonyms and token.pos_ in {"NOUN", "VERB", "ADJ", "ADV"} and synonyms[0] != token.text.lower():
            paraphrased_words.append(synonyms[0])
        else:
            paraphrased_words.append(token.text)
    
    # Join the words back into a sentence
    paraphrased_sentence = ' '.join(paraphrased_words)
    
    # Capitalize sentences and proper nouns
    corrected_text = capitalize_sentences_and_nouns(paraphrased_sentence)
    
    return corrected_text

# Function to correct grammar using Gramformer
def correct_grammar(text):
    corrected_sentences = gf.correct(text)
    return corrected_sentences[0] if corrected_sentences else text

# Combined function: Paraphrase -> Tense Check -> Capitalization -> Grammar Correction
def paraphrase_and_correct(text):
    # Step 1: Paraphrase the text
    paraphrased_text = paraphrase_with_spacy_nltk(text)
    
    # Step 2: Check tense and verbs, and attempt correction
    tense_checked_text = check_tense_and_correct(paraphrased_text)
    
    # Step 3: Capitalize sentences and proper nouns
    capitalized_text = capitalize_sentences_and_nouns(tense_checked_text)
    
    # Step 4: Correct grammar using Gramformer
    final_text = correct_grammar(capitalized_text)
    
    return final_text

# Gradio interface definition
with gr.Blocks() as interface:
    with gr.Row():
        with gr.Column():
            text_input = gr.Textbox(lines=5, label="Input Text")
            detect_button = gr.Button("AI Detection")
            paraphrase_button = gr.Button("Paraphrase & Correct")
        with gr.Column():
            output_text = gr.Textbox(label="Output")

    detect_button.click(detect_ai_generated, inputs=text_input, outputs=output_text)
    paraphrase_button.click(paraphrase_and_correct, inputs=text_input, outputs=output_text)

# Launch the Gradio app
interface.launch(debug=False)