Spaces:

taha092
/

HumanizerV2

Runtime error

App Files Files Community

taha092 commited on 24 days ago

Commit

78bbe6d

verified ·

1 Parent(s): b9a2183

Update app.py

Browse files

Files changed (1) hide show

app.py +220 -180

app.py CHANGED Viewed

@@ -1,181 +1,221 @@
-import gradio as gr
-import torch
-from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
-from sentence_transformers import SentenceTransformer, util
-import numpy as np
-import requests
-import gradio.themes as grthemes
-import config
-# ----------------------
-# Paraphrasing Model Setup
-# ----------------------
-PARAPHRASE_MODEL_NAME = "Vamsi/T5_Paraphrase_Paws"
-paraphrase_tokenizer = AutoTokenizer.from_pretrained(PARAPHRASE_MODEL_NAME)
-paraphrase_model = AutoModelForSeq2SeqLM.from_pretrained(PARAPHRASE_MODEL_NAME)
-device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
-paraphrase_model = paraphrase_model.to(device)
-# ----------------------
-# Semantic Similarity Model
-# ----------------------
-similarity_model = SentenceTransformer('all-MiniLM-L6-v2')
-# ----------------------
-# Tone Templates
-# ----------------------
-tone_templates = {
-    "Academic": "Paraphrase the following text in a formal, academic tone:",
-    "Casual": "Paraphrase the following text in a casual, conversational tone:",
-    "Friendly": "Paraphrase the following text in a friendly, approachable tone:",
-    "Stealth": "Paraphrase the following text to bypass AI detectors and sound as human as possible:",
-}
-# ----------------------
-# Paraphrasing Function
-# ----------------------
-def paraphrase(text, tone):
-    prompt = tone_templates[tone] + " " + text
-    input_ids = paraphrase_tokenizer.encode(prompt, return_tensors="pt", max_length=256, truncation=True).to(device)
-    outputs = paraphrase_model.generate(
-        input_ids,
-        do_sample=True,
-        top_k=120,
-        top_p=0.95,
-        temperature=0.7,
-        repetition_penalty=1.2,
-        max_length=256,
-        num_return_sequences=1
-    )
-    paraphrased = paraphrase_tokenizer.decode(outputs[0], skip_special_tokens=True)
-    return paraphrased
-# ----------------------
-# Semantic Similarity Function
-# ----------------------
-def semantic_similarity(text1, text2):
-    emb1 = similarity_model.encode(text1, convert_to_tensor=True)
-    emb2 = similarity_model.encode(text2, convert_to_tensor=True)
-    sim = util.pytorch_cos_sim(emb1, emb2).item()
-    return sim
-# ----------------------
-# Real AI Detection (Winston AI API)
-# ----------------------
-def check_ai_score(text):
-    api_key = config.WINSTON_AI_API_KEY
-    api_url = config.WINSTON_AI_API_URL
-    if not api_key:
-        return None, "No API key set. Please add your Winston AI API key to config.py."
-    headers = {
-        "Authorization": f"Bearer {api_key}",
-        "Content-Type": "application/json"
-    }
-    data = {"text": text, "sentences": False}
-    try:
-        response = requests.post(api_url, headers=headers, json=data, timeout=30)
-        if response.status_code == 200:
-            result = response.json()
-            # Winston AI returns a 'score' (0-100, higher = more human)
-            score = result.get("score", None)
-            if score is not None:
-                ai_prob = 1.0 - (score / 100.0)
-                return ai_prob, None
-            else:
-                return None, "No score in Winston AI response."
-        else:
-            return None, f"Winston AI error: {response.status_code} {response.text}"
-    except Exception as e:
-        return None, f"Winston AI exception: {str(e)}"
-# ----------------------
-# Humanization Score & Rating
-# ----------------------
-def humanization_score(sim, ai_prob):
-    # Lower similarity and lower AI probability = more human
-    score = (1.0 - sim) * 0.5 + (1.0 - ai_prob) * 0.5
-    return score
-def humanization_rating(score):
-    if score < 0.7:
-        return f"⚠️ Still AI-like ({score:.2f})"
-    elif score < 0.85:
-        return f"👍 Acceptable ({score:.2f})"
-    else:
-        return f"✅ Highly Humanized ({score:.2f})"
-# ----------------------
-# Main Processing Function
-# ----------------------
-def process(text, tone):
-    if not text.strip():
-        return "", "", 0.0, "", 0.0, ""
-    # Pre-humanization AI detection
-    pre_ai_prob, pre_err = check_ai_score(text)
-    if pre_ai_prob is None:
-        return "", f"AI Detection Error: {pre_err}", 0.0, "", 0.0, ""
-    # Paraphrase
-    try:
-        paraphrased = paraphrase(text, tone)
-    except Exception as e:
-        return f"[Paraphrasing error: {str(e)}]", "", 0.0, "", 0.0, ""
-    # Post-humanization AI detection
-    post_ai_prob, post_err = check_ai_score(paraphrased)
-    if post_ai_prob is None:
-        return paraphrased, f"AI Detection Error: {post_err}", 0.0, "", 0.0, ""
-    # Semantic similarity
-    sim = semantic_similarity(text, paraphrased)
-    # Humanization score
-    score = humanization_score(sim, post_ai_prob)
-    rating = humanization_rating(score)
-    ai_score_str = f"Pre: {100*(1-pre_ai_prob):.1f}% human | Post: {100*(1-post_ai_prob):.1f}% human"
-    return (
-        paraphrased,         # gr.Textbox (string)
-        ai_score_str,        # gr.Markdown (string)
-        sim,                # gr.Number (float)
-        rating,             # gr.Markdown (string)
-        score * 100,        # gr.Number (float)
-        ""
-    )
-# ----------------------
-# Gradio UI
-# ----------------------
-custom_theme = grthemes.Base(
-    primary_hue="blue",
-    secondary_hue="blue",
-    neutral_hue="slate"
-)
-with gr.Blocks(theme=custom_theme, title="AI Humanizer - Made by Taha") as demo:
-    gr.Markdown("""
-    # 🧠 AI Humanizer
-    <div style='display:flex;justify-content:space-between;align-items:center;'>
-        <span style='font-size:1.2em;color:#7bb1ff;'>Rewrite AI text to sound 100% human</span>
-        <span style='font-weight:bold;color:#7bb1ff;'>Made by Taha</span>
-    </div>
-    """, elem_id="header")
-    with gr.Row():
-        with gr.Column():
-            text_in = gr.Textbox(label="Paste AI-generated text here", lines=8, placeholder="Paste your text...", elem_id="input-box")
-            tone = gr.Dropdown(["Academic", "Casual", "Friendly", "Stealth"], value="Stealth", label="Tone Selector")
-            btn = gr.Button("Humanize", elem_id="humanize-btn")
-        with gr.Column():
-            text_out = gr.Textbox(label="Humanized Output", lines=8, interactive=False, elem_id="output-box")
-            ai_scores = gr.Markdown("", elem_id="ai-scores")
-            sim_score = gr.Number(label="Similarity (0=very different, 1=very similar)", interactive=False)
-            rating = gr.Markdown("", elem_id="rating")
-            human_score = gr.Number(label="Humanization Score (%)", interactive=False)
-    btn.click(
-        process,
-        inputs=[text_in, tone],
-        outputs=[text_out, ai_scores, sim_score, rating, human_score, gr.Textbox(visible=False)],
-        api_name="humanize"
-    )
-    gr.Markdown("""
-    <div style='text-align:center;color:#7bb1ff;margin-top:2em;'>
-        <b>Made by Taha</b> | Free for unlimited use | Optimized for students and creators
-    </div>
-    """, elem_id="footer")
 demo.launch()

+import gradio as gr
+import torch
+from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
+from sentence_transformers import SentenceTransformer, util
+import numpy as np
+import requests
+import gradio.themes as grthemes
+import config
+import random
+import re
+# ----------------------
+# Paraphrasing Model Setup (Pegasus)
+# ----------------------
+PARAPHRASE_MODEL_NAME = "tuner007/pegasus_paraphrase"
+paraphrase_tokenizer = AutoTokenizer.from_pretrained(PARAPHRASE_MODEL_NAME)
+paraphrase_model = AutoModelForSeq2SeqLM.from_pretrained(PARAPHRASE_MODEL_NAME)
+device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+paraphrase_model = paraphrase_model.to(device)
+# ----------------------
+# Semantic Similarity Model
+# ----------------------
+similarity_model = SentenceTransformer('all-MiniLM-L6-v2')
+# ----------------------
+# Prompt Variations for Humanization
+# ----------------------
+PROMPT_VARIANTS = [
+    "Paraphrase this naturally:",
+    "Rewrite as if explaining to a friend:",
+    "Make this sound like a real conversation:",
+    "Express this in a casual, human way:",
+    "Reword this with natural flow:",
+    "Make this sound less robotic:",
+    "Rewrite in a friendly, informal tone:",
+    "Paraphrase in a way a student would say it:",
+]
+# ----------------------
+# Sentence Splitter
+# ----------------------
+def split_sentences(text):
+    # Simple sentence splitter (can be improved for edge cases)
+    sentences = re.split(r'(?<=[.!?])\s+', text.strip())
+    return [s for s in sentences if s]
+# ----------------------
+# Light Post-Processing
+# ----------------------
+def postprocess_text(text):
+    # Add contractions, simple idioms, and vary sentence length a bit
+    contractions = {
+        "do not": "don't", "cannot": "can't", "will not": "won't", "I am": "I'm",
+        "is not": "isn't", "are not": "aren't", "did not": "didn't", "it is": "it's",
+        "does not": "doesn't", "have not": "haven't", "has not": "hasn't"
+    }
+    for k, v in contractions.items():
+        text = re.sub(rf'\b{k}\b', v, text, flags=re.IGNORECASE)
+    # Add a simple idiom randomly
+    idioms = [
+        "at the end of the day", "to be honest", "as a matter of fact", "for what it's worth",
+        "in a nutshell", "the bottom line is", "all things considered"
+    ]
+    if random.random() < 0.3:
+        text += " " + random.choice(idioms) + "."
+    return text
+# ----------------------
+# Sentence-level Paraphrasing with Prompt Variation
+# ----------------------
+def paraphrase_sentence(sentence, tone):
+    prompt = random.choice(PROMPT_VARIANTS)
+    if tone != "Stealth":
+        prompt = f"{prompt} ({tone} tone):"
+    full_prompt = f"{prompt} {sentence}"
+    batch = paraphrase_tokenizer([full_prompt], truncation=True, padding='longest', max_length=60, return_tensors="pt").to(device)
+    outputs = paraphrase_model.generate(
+        **batch,
+        max_length=60,
+        num_beams=5,
+        num_return_sequences=1,
+        temperature=1.0
+    )
+    tgt_text = paraphrase_tokenizer.batch_decode(outputs, skip_special_tokens=True)
+    return tgt_text[0] if tgt_text else sentence
+# ----------------------
+# Main Paraphrasing Function
+# ----------------------
+def paraphrase(text, tone):
+    sentences = split_sentences(text)
+    paraphrased = []
+    for sent in sentences:
+        rewritten = paraphrase_sentence(sent, tone)
+        paraphrased.append(rewritten)
+    joined = ' '.join(paraphrased)
+    return postprocess_text(joined)
+# ----------------------
+# Semantic Similarity Function
+# ----------------------
+def semantic_similarity(text1, text2):
+    emb1 = similarity_model.encode(text1, convert_to_tensor=True)
+    emb2 = similarity_model.encode(text2, convert_to_tensor=True)
+    sim = util.pytorch_cos_sim(emb1, emb2).item()
+    return sim
+# ----------------------
+# Real AI Detection (Winston AI API)
+# ----------------------
+def check_ai_score(text):
+    api_key = config.WINSTON_AI_API_KEY
+    api_url = config.WINSTON_AI_API_URL
+    if not api_key:
+        return None, "No API key set. Please add your Winston AI API key to config.py."
+    headers = {
+        "Authorization": f"Bearer {api_key}",
+        "Content-Type": "application/json"
+    }
+    data = {"text": text, "sentences": False}
+    try:
+        response = requests.post(api_url, headers=headers, json=data, timeout=30)
+        if response.status_code == 200:
+            result = response.json()
+            score = result.get("score", None)
+            if score is not None:
+                ai_prob = 1.0 - (score / 100.0)
+                return ai_prob, None
+            else:
+                return None, "No score in Winston AI response."
+        else:
+            return None, f"Winston AI error: {response.status_code} {response.text}"
+    except Exception as e:
+        return None, f"Winston AI exception: {str(e)}"
+# ----------------------
+# Humanization Score & Rating
+# ----------------------
+def humanization_score(sim, ai_prob):
+    score = (1.0 - sim) * 0.5 + (1.0 - ai_prob) * 0.5
+    return score
+def humanization_rating(score):
+    if score < 0.7:
+        return f"⚠️ Still AI-like ({score:.2f})"
+    elif score < 0.85:
+        return f"👍 Acceptable ({score:.2f})"
+    else:
+        return f"✅ Highly Humanized ({score:.2f})"
+# ----------------------
+# Main Processing Function
+# ----------------------
+def process(text, tone):
+    if not text.strip():
+        return "", "", 0.0, "", 0.0, ""
+    pre_ai_prob, pre_err = check_ai_score(text)
+    if pre_ai_prob is None:
+        return "", f"AI Detection Error: {pre_err}", 0.0, "", 0.0, ""
+    try:
+        paraphrased = paraphrase(text, tone)
+    except Exception as e:
+        return f"[Paraphrasing error: {str(e)}]", "", 0.0, "", 0.0, ""
+    post_ai_prob, post_err = check_ai_score(paraphrased)
+    if post_ai_prob is None:
+        return paraphrased, f"AI Detection Error: {post_err}", 0.0, "", 0.0, ""
+    sim = semantic_similarity(text, paraphrased)
+    score = humanization_score(sim, post_ai_prob)
+    rating = humanization_rating(score)
+    ai_score_str = f"Pre: {100*(1-pre_ai_prob):.1f}% human | Post: {100*(1-post_ai_prob):.1f}% human"
+    return (
+        paraphrased,
+        ai_score_str,
+        sim,
+        rating,
+        score * 100,
+        ""
+    )
+# ----------------------
+# Gradio UI
+# ----------------------
+custom_theme = grthemes.Base(
+    primary_hue="blue",
+    secondary_hue="blue",
+    neutral_hue="slate"
+)
+with gr.Blocks(theme=custom_theme, title="AI Humanizer - Made by Taha") as demo:
+    gr.Markdown("""
+    # 🧠 AI Humanizer
+    <div style='display:flex;justify-content:space-between;align-items:center;'>
+        <span style='font-size:1.2em;color:#7bb1ff;'>Rewrite AI text to sound 100% human</span>
+        <span style='font-weight:bold;color:#7bb1ff;'>Made by Taha</span>
+    </div>
+    """, elem_id="header")
+    with gr.Row():
+        with gr.Column():
+            text_in = gr.Textbox(label="Paste AI-generated text here", lines=8, placeholder="Paste your text...", elem_id="input-box")
+            tone = gr.Dropdown(["Academic", "Casual", "Friendly", "Stealth"], value="Stealth", label="Tone Selector")
+            btn = gr.Button("Humanize", elem_id="humanize-btn")
+        with gr.Column():
+            text_out = gr.Textbox(label="Humanized Output", lines=8, interactive=False, elem_id="output-box")
+            ai_scores = gr.Markdown("", elem_id="ai-scores")
+            sim_score = gr.Number(label="Similarity (0=very different, 1=very similar)", interactive=False)
+            rating = gr.Markdown("", elem_id="rating")
+            human_score = gr.Number(label="Humanization Score (%)", interactive=False)
+    btn.click(
+        process,
+        inputs=[text_in, tone],
+        outputs=[text_out, ai_scores, sim_score, rating, human_score, gr.Textbox(visible=False)],
+        api_name="humanize"
+    )
+    gr.Markdown("""
+    <div style='text-align:center;color:#7bb1ff;margin-top:2em;'>
+        <b>Made by Taha</b> | Free for unlimited use | Optimized for students and creators
+    </div>
+    """, elem_id="footer")
 demo.launch()