taha092 commited on
Commit
78bbe6d
·
verified ·
1 Parent(s): b9a2183

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +220 -180
app.py CHANGED
@@ -1,181 +1,221 @@
1
- import gradio as gr
2
- import torch
3
- from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
4
- from sentence_transformers import SentenceTransformer, util
5
- import numpy as np
6
- import requests
7
- import gradio.themes as grthemes
8
- import config
9
-
10
- # ----------------------
11
- # Paraphrasing Model Setup
12
- # ----------------------
13
- PARAPHRASE_MODEL_NAME = "Vamsi/T5_Paraphrase_Paws"
14
- paraphrase_tokenizer = AutoTokenizer.from_pretrained(PARAPHRASE_MODEL_NAME)
15
- paraphrase_model = AutoModelForSeq2SeqLM.from_pretrained(PARAPHRASE_MODEL_NAME)
16
- device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
17
- paraphrase_model = paraphrase_model.to(device)
18
-
19
- # ----------------------
20
- # Semantic Similarity Model
21
- # ----------------------
22
- similarity_model = SentenceTransformer('all-MiniLM-L6-v2')
23
-
24
- # ----------------------
25
- # Tone Templates
26
- # ----------------------
27
- tone_templates = {
28
- "Academic": "Paraphrase the following text in a formal, academic tone:",
29
- "Casual": "Paraphrase the following text in a casual, conversational tone:",
30
- "Friendly": "Paraphrase the following text in a friendly, approachable tone:",
31
- "Stealth": "Paraphrase the following text to bypass AI detectors and sound as human as possible:",
32
- }
33
-
34
- # ----------------------
35
- # Paraphrasing Function
36
- # ----------------------
37
- def paraphrase(text, tone):
38
- prompt = tone_templates[tone] + " " + text
39
- input_ids = paraphrase_tokenizer.encode(prompt, return_tensors="pt", max_length=256, truncation=True).to(device)
40
- outputs = paraphrase_model.generate(
41
- input_ids,
42
- do_sample=True,
43
- top_k=120,
44
- top_p=0.95,
45
- temperature=0.7,
46
- repetition_penalty=1.2,
47
- max_length=256,
48
- num_return_sequences=1
49
- )
50
- paraphrased = paraphrase_tokenizer.decode(outputs[0], skip_special_tokens=True)
51
- return paraphrased
52
-
53
- # ----------------------
54
- # Semantic Similarity Function
55
- # ----------------------
56
- def semantic_similarity(text1, text2):
57
- emb1 = similarity_model.encode(text1, convert_to_tensor=True)
58
- emb2 = similarity_model.encode(text2, convert_to_tensor=True)
59
- sim = util.pytorch_cos_sim(emb1, emb2).item()
60
- return sim
61
-
62
- # ----------------------
63
- # Real AI Detection (Winston AI API)
64
- # ----------------------
65
- def check_ai_score(text):
66
- api_key = config.WINSTON_AI_API_KEY
67
- api_url = config.WINSTON_AI_API_URL
68
- if not api_key:
69
- return None, "No API key set. Please add your Winston AI API key to config.py."
70
- headers = {
71
- "Authorization": f"Bearer {api_key}",
72
- "Content-Type": "application/json"
73
- }
74
- data = {"text": text, "sentences": False}
75
- try:
76
- response = requests.post(api_url, headers=headers, json=data, timeout=30)
77
- if response.status_code == 200:
78
- result = response.json()
79
- # Winston AI returns a 'score' (0-100, higher = more human)
80
- score = result.get("score", None)
81
- if score is not None:
82
- ai_prob = 1.0 - (score / 100.0)
83
- return ai_prob, None
84
- else:
85
- return None, "No score in Winston AI response."
86
- else:
87
- return None, f"Winston AI error: {response.status_code} {response.text}"
88
- except Exception as e:
89
- return None, f"Winston AI exception: {str(e)}"
90
-
91
- # ----------------------
92
- # Humanization Score & Rating
93
- # ----------------------
94
- def humanization_score(sim, ai_prob):
95
- # Lower similarity and lower AI probability = more human
96
- score = (1.0 - sim) * 0.5 + (1.0 - ai_prob) * 0.5
97
- return score
98
-
99
- def humanization_rating(score):
100
- if score < 0.7:
101
- return f"⚠️ Still AI-like ({score:.2f})"
102
- elif score < 0.85:
103
- return f"👍 Acceptable ({score:.2f})"
104
- else:
105
- return f"✅ Highly Humanized ({score:.2f})"
106
-
107
- # ----------------------
108
- # Main Processing Function
109
- # ----------------------
110
- def process(text, tone):
111
- if not text.strip():
112
- return "", "", 0.0, "", 0.0, ""
113
- # Pre-humanization AI detection
114
- pre_ai_prob, pre_err = check_ai_score(text)
115
- if pre_ai_prob is None:
116
- return "", f"AI Detection Error: {pre_err}", 0.0, "", 0.0, ""
117
- # Paraphrase
118
- try:
119
- paraphrased = paraphrase(text, tone)
120
- except Exception as e:
121
- return f"[Paraphrasing error: {str(e)}]", "", 0.0, "", 0.0, ""
122
- # Post-humanization AI detection
123
- post_ai_prob, post_err = check_ai_score(paraphrased)
124
- if post_ai_prob is None:
125
- return paraphrased, f"AI Detection Error: {post_err}", 0.0, "", 0.0, ""
126
- # Semantic similarity
127
- sim = semantic_similarity(text, paraphrased)
128
- # Humanization score
129
- score = humanization_score(sim, post_ai_prob)
130
- rating = humanization_rating(score)
131
- ai_score_str = f"Pre: {100*(1-pre_ai_prob):.1f}% human | Post: {100*(1-post_ai_prob):.1f}% human"
132
- return (
133
- paraphrased, # gr.Textbox (string)
134
- ai_score_str, # gr.Markdown (string)
135
- sim, # gr.Number (float)
136
- rating, # gr.Markdown (string)
137
- score * 100, # gr.Number (float)
138
- ""
139
- )
140
-
141
- # ----------------------
142
- # Gradio UI
143
- # ----------------------
144
- custom_theme = grthemes.Base(
145
- primary_hue="blue",
146
- secondary_hue="blue",
147
- neutral_hue="slate"
148
- )
149
-
150
- with gr.Blocks(theme=custom_theme, title="AI Humanizer - Made by Taha") as demo:
151
- gr.Markdown("""
152
- # 🧠 AI Humanizer
153
- <div style='display:flex;justify-content:space-between;align-items:center;'>
154
- <span style='font-size:1.2em;color:#7bb1ff;'>Rewrite AI text to sound 100% human</span>
155
- <span style='font-weight:bold;color:#7bb1ff;'>Made by Taha</span>
156
- </div>
157
- """, elem_id="header")
158
- with gr.Row():
159
- with gr.Column():
160
- text_in = gr.Textbox(label="Paste AI-generated text here", lines=8, placeholder="Paste your text...", elem_id="input-box")
161
- tone = gr.Dropdown(["Academic", "Casual", "Friendly", "Stealth"], value="Stealth", label="Tone Selector")
162
- btn = gr.Button("Humanize", elem_id="humanize-btn")
163
- with gr.Column():
164
- text_out = gr.Textbox(label="Humanized Output", lines=8, interactive=False, elem_id="output-box")
165
- ai_scores = gr.Markdown("", elem_id="ai-scores")
166
- sim_score = gr.Number(label="Similarity (0=very different, 1=very similar)", interactive=False)
167
- rating = gr.Markdown("", elem_id="rating")
168
- human_score = gr.Number(label="Humanization Score (%)", interactive=False)
169
- btn.click(
170
- process,
171
- inputs=[text_in, tone],
172
- outputs=[text_out, ai_scores, sim_score, rating, human_score, gr.Textbox(visible=False)],
173
- api_name="humanize"
174
- )
175
- gr.Markdown("""
176
- <div style='text-align:center;color:#7bb1ff;margin-top:2em;'>
177
- <b>Made by Taha</b> | Free for unlimited use | Optimized for students and creators
178
- </div>
179
- """, elem_id="footer")
180
-
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
181
  demo.launch()
 
1
+ import gradio as gr
2
+ import torch
3
+ from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
4
+ from sentence_transformers import SentenceTransformer, util
5
+ import numpy as np
6
+ import requests
7
+ import gradio.themes as grthemes
8
+ import config
9
+ import random
10
+ import re
11
+
12
+ # ----------------------
13
+ # Paraphrasing Model Setup (Pegasus)
14
+ # ----------------------
15
+ PARAPHRASE_MODEL_NAME = "tuner007/pegasus_paraphrase"
16
+ paraphrase_tokenizer = AutoTokenizer.from_pretrained(PARAPHRASE_MODEL_NAME)
17
+ paraphrase_model = AutoModelForSeq2SeqLM.from_pretrained(PARAPHRASE_MODEL_NAME)
18
+ device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
19
+ paraphrase_model = paraphrase_model.to(device)
20
+
21
+ # ----------------------
22
+ # Semantic Similarity Model
23
+ # ----------------------
24
+ similarity_model = SentenceTransformer('all-MiniLM-L6-v2')
25
+
26
+ # ----------------------
27
+ # Prompt Variations for Humanization
28
+ # ----------------------
29
+ PROMPT_VARIANTS = [
30
+ "Paraphrase this naturally:",
31
+ "Rewrite as if explaining to a friend:",
32
+ "Make this sound like a real conversation:",
33
+ "Express this in a casual, human way:",
34
+ "Reword this with natural flow:",
35
+ "Make this sound less robotic:",
36
+ "Rewrite in a friendly, informal tone:",
37
+ "Paraphrase in a way a student would say it:",
38
+ ]
39
+
40
+ # ----------------------
41
+ # Sentence Splitter
42
+ # ----------------------
43
+ def split_sentences(text):
44
+ # Simple sentence splitter (can be improved for edge cases)
45
+ sentences = re.split(r'(?<=[.!?])\s+', text.strip())
46
+ return [s for s in sentences if s]
47
+
48
+ # ----------------------
49
+ # Light Post-Processing
50
+ # ----------------------
51
+ def postprocess_text(text):
52
+ # Add contractions, simple idioms, and vary sentence length a bit
53
+ contractions = {
54
+ "do not": "don't", "cannot": "can't", "will not": "won't", "I am": "I'm",
55
+ "is not": "isn't", "are not": "aren't", "did not": "didn't", "it is": "it's",
56
+ "does not": "doesn't", "have not": "haven't", "has not": "hasn't"
57
+ }
58
+ for k, v in contractions.items():
59
+ text = re.sub(rf'\b{k}\b', v, text, flags=re.IGNORECASE)
60
+ # Add a simple idiom randomly
61
+ idioms = [
62
+ "at the end of the day", "to be honest", "as a matter of fact", "for what it's worth",
63
+ "in a nutshell", "the bottom line is", "all things considered"
64
+ ]
65
+ if random.random() < 0.3:
66
+ text += " " + random.choice(idioms) + "."
67
+ return text
68
+
69
+ # ----------------------
70
+ # Sentence-level Paraphrasing with Prompt Variation
71
+ # ----------------------
72
+ def paraphrase_sentence(sentence, tone):
73
+ prompt = random.choice(PROMPT_VARIANTS)
74
+ if tone != "Stealth":
75
+ prompt = f"{prompt} ({tone} tone):"
76
+ full_prompt = f"{prompt} {sentence}"
77
+ batch = paraphrase_tokenizer([full_prompt], truncation=True, padding='longest', max_length=60, return_tensors="pt").to(device)
78
+ outputs = paraphrase_model.generate(
79
+ **batch,
80
+ max_length=60,
81
+ num_beams=5,
82
+ num_return_sequences=1,
83
+ temperature=1.0
84
+ )
85
+ tgt_text = paraphrase_tokenizer.batch_decode(outputs, skip_special_tokens=True)
86
+ return tgt_text[0] if tgt_text else sentence
87
+
88
+ # ----------------------
89
+ # Main Paraphrasing Function
90
+ # ----------------------
91
+ def paraphrase(text, tone):
92
+ sentences = split_sentences(text)
93
+ paraphrased = []
94
+ for sent in sentences:
95
+ rewritten = paraphrase_sentence(sent, tone)
96
+ paraphrased.append(rewritten)
97
+ joined = ' '.join(paraphrased)
98
+ return postprocess_text(joined)
99
+
100
+ # ----------------------
101
+ # Semantic Similarity Function
102
+ # ----------------------
103
+ def semantic_similarity(text1, text2):
104
+ emb1 = similarity_model.encode(text1, convert_to_tensor=True)
105
+ emb2 = similarity_model.encode(text2, convert_to_tensor=True)
106
+ sim = util.pytorch_cos_sim(emb1, emb2).item()
107
+ return sim
108
+
109
+ # ----------------------
110
+ # Real AI Detection (Winston AI API)
111
+ # ----------------------
112
+ def check_ai_score(text):
113
+ api_key = config.WINSTON_AI_API_KEY
114
+ api_url = config.WINSTON_AI_API_URL
115
+ if not api_key:
116
+ return None, "No API key set. Please add your Winston AI API key to config.py."
117
+ headers = {
118
+ "Authorization": f"Bearer {api_key}",
119
+ "Content-Type": "application/json"
120
+ }
121
+ data = {"text": text, "sentences": False}
122
+ try:
123
+ response = requests.post(api_url, headers=headers, json=data, timeout=30)
124
+ if response.status_code == 200:
125
+ result = response.json()
126
+ score = result.get("score", None)
127
+ if score is not None:
128
+ ai_prob = 1.0 - (score / 100.0)
129
+ return ai_prob, None
130
+ else:
131
+ return None, "No score in Winston AI response."
132
+ else:
133
+ return None, f"Winston AI error: {response.status_code} {response.text}"
134
+ except Exception as e:
135
+ return None, f"Winston AI exception: {str(e)}"
136
+
137
+ # ----------------------
138
+ # Humanization Score & Rating
139
+ # ----------------------
140
+ def humanization_score(sim, ai_prob):
141
+ score = (1.0 - sim) * 0.5 + (1.0 - ai_prob) * 0.5
142
+ return score
143
+
144
+ def humanization_rating(score):
145
+ if score < 0.7:
146
+ return f"⚠️ Still AI-like ({score:.2f})"
147
+ elif score < 0.85:
148
+ return f"👍 Acceptable ({score:.2f})"
149
+ else:
150
+ return f" Highly Humanized ({score:.2f})"
151
+
152
+ # ----------------------
153
+ # Main Processing Function
154
+ # ----------------------
155
+ def process(text, tone):
156
+ if not text.strip():
157
+ return "", "", 0.0, "", 0.0, ""
158
+ pre_ai_prob, pre_err = check_ai_score(text)
159
+ if pre_ai_prob is None:
160
+ return "", f"AI Detection Error: {pre_err}", 0.0, "", 0.0, ""
161
+ try:
162
+ paraphrased = paraphrase(text, tone)
163
+ except Exception as e:
164
+ return f"[Paraphrasing error: {str(e)}]", "", 0.0, "", 0.0, ""
165
+ post_ai_prob, post_err = check_ai_score(paraphrased)
166
+ if post_ai_prob is None:
167
+ return paraphrased, f"AI Detection Error: {post_err}", 0.0, "", 0.0, ""
168
+ sim = semantic_similarity(text, paraphrased)
169
+ score = humanization_score(sim, post_ai_prob)
170
+ rating = humanization_rating(score)
171
+ ai_score_str = f"Pre: {100*(1-pre_ai_prob):.1f}% human | Post: {100*(1-post_ai_prob):.1f}% human"
172
+ return (
173
+ paraphrased,
174
+ ai_score_str,
175
+ sim,
176
+ rating,
177
+ score * 100,
178
+ ""
179
+ )
180
+
181
+ # ----------------------
182
+ # Gradio UI
183
+ # ----------------------
184
+ custom_theme = grthemes.Base(
185
+ primary_hue="blue",
186
+ secondary_hue="blue",
187
+ neutral_hue="slate"
188
+ )
189
+
190
+ with gr.Blocks(theme=custom_theme, title="AI Humanizer - Made by Taha") as demo:
191
+ gr.Markdown("""
192
+ # 🧠 AI Humanizer
193
+ <div style='display:flex;justify-content:space-between;align-items:center;'>
194
+ <span style='font-size:1.2em;color:#7bb1ff;'>Rewrite AI text to sound 100% human</span>
195
+ <span style='font-weight:bold;color:#7bb1ff;'>Made by Taha</span>
196
+ </div>
197
+ """, elem_id="header")
198
+ with gr.Row():
199
+ with gr.Column():
200
+ text_in = gr.Textbox(label="Paste AI-generated text here", lines=8, placeholder="Paste your text...", elem_id="input-box")
201
+ tone = gr.Dropdown(["Academic", "Casual", "Friendly", "Stealth"], value="Stealth", label="Tone Selector")
202
+ btn = gr.Button("Humanize", elem_id="humanize-btn")
203
+ with gr.Column():
204
+ text_out = gr.Textbox(label="Humanized Output", lines=8, interactive=False, elem_id="output-box")
205
+ ai_scores = gr.Markdown("", elem_id="ai-scores")
206
+ sim_score = gr.Number(label="Similarity (0=very different, 1=very similar)", interactive=False)
207
+ rating = gr.Markdown("", elem_id="rating")
208
+ human_score = gr.Number(label="Humanization Score (%)", interactive=False)
209
+ btn.click(
210
+ process,
211
+ inputs=[text_in, tone],
212
+ outputs=[text_out, ai_scores, sim_score, rating, human_score, gr.Textbox(visible=False)],
213
+ api_name="humanize"
214
+ )
215
+ gr.Markdown("""
216
+ <div style='text-align:center;color:#7bb1ff;margin-top:2em;'>
217
+ <b>Made by Taha</b> | Free for unlimited use | Optimized for students and creators
218
+ </div>
219
+ """, elem_id="footer")
220
+
221
  demo.launch()