# Import dependencies
import gradio as gr
from transformers import AutoTokenizer, AutoModelForSequenceClassification, T5Tokenizer, T5ForConditionalGeneration
import torch
import nltk
import spacy
from nltk.corpus import wordnet
import subprocess

# Download NLTK data (if not already downloaded)
nltk.download('punkt')
nltk.download('stopwords')
nltk.download('wordnet')  # Download WordNet

# Download spaCy model if not already installed
try:
    nlp = spacy.load("en_core_web_sm")
except OSError:
    subprocess.run(["python", "-m", "spacy", "download", "en_core_web_sm"])
    nlp = spacy.load("en_core_web_sm")

# Check for GPU and set the device accordingly
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# Load AI Detector model and tokenizer from Hugging Face (DistilBERT)
tokenizer = AutoTokenizer.from_pretrained("distilbert-base-uncased-finetuned-sst-2-english")
model = AutoModelForSequenceClassification.from_pretrained("distilbert-base-uncased-finetuned-sst-2-english").to(device)

# Load SRDdev Paraphrase model and tokenizer for humanizing text
paraphrase_tokenizer = T5Tokenizer.from_pretrained("SRDdev/Paraphrase")
paraphrase_model = T5ForConditionalGeneration.from_pretrained("SRDdev/Paraphrase").to(device)

# Function to find synonyms using WordNet via NLTK
def get_synonyms(word):
    synonyms = set()
    for syn in wordnet.synsets(word):
        for lemma in syn.lemmas():
            synonyms.add(lemma.name())
    return list(synonyms)

# Replace words with synonyms using spaCy and WordNet
def replace_with_synonyms(text):
    doc = nlp(text)
    processed_text = []
    for token in doc:
        synonyms = get_synonyms(token.text.lower())
        if synonyms and token.pos_ in {"NOUN", "VERB", "ADJ", "ADV"}:  # Only replace certain types of words
            replacement = synonyms[0]  # Replace with the first synonym
            if token.is_title:
                replacement = replacement.capitalize()
            processed_text.append(replacement)
        else:
            processed_text.append(token.text)
    return " ".join(processed_text)

# AI detection function using DistilBERT
def detect_ai_generated(text):
    inputs = tokenizer(text, return_tensors="pt", truncation=True, max_length=512).to(device)
    with torch.no_grad():
        outputs = model(**inputs)
        probabilities = torch.softmax(outputs.logits, dim=1)
    return probabilities[0][1].item()  # Probability of being AI-generated

# Humanize the AI-detected text using the SRDdev Paraphrase model
def humanize_text(AI_text):
    paragraphs = AI_text.split("\n")
    paraphrased_paragraphs = []
    for paragraph in paragraphs:
        if paragraph.strip():
            inputs = paraphrase_tokenizer(paragraph, return_tensors="pt", max_length=512, truncation=True).to(device)
            with torch.no_grad():  # Avoid gradient calculations for faster inference
                paraphrased_ids = paraphrase_model.generate(
                    inputs['input_ids'],
                    max_length=inputs['input_ids'].shape[-1] + 20,  # Slightly more than the original input length
                    num_beams=4,
                    early_stopping=True,
                    length_penalty=1.0,
                    no_repeat_ngram_size=3,
                )
            paraphrased_text = paraphrase_tokenizer.decode(paraphrased_ids[0], skip_special_tokens=True)
            paraphrased_paragraphs.append(paraphrased_text)
    return "\n\n".join(paraphrased_paragraphs)

# Main function to handle the overall process
def main_function(AI_text):
    # Replace words with synonyms
    text_with_synonyms = replace_with_synonyms(AI_text)
    
    # Detect AI-generated content
    ai_probability = detect_ai_generated(text_with_synonyms)
    
    # Humanize AI text
    humanized_text = humanize_text(text_with_synonyms)
    
    return f"AI-Generated Content: {ai_probability:.2f}%\n\nHumanized Text:\n{humanized_text}"

# Gradio interface definition
interface = gr.Interface(
    fn=main_function,
    inputs="textbox",
    outputs="textbox",
    title="AI Text Humanizer with Synonym Replacement",
    description="Enter AI-generated text and get a human-written version, with synonyms replaced for more natural output. This space uses models from Hugging Face directly."
)

# Launch the Gradio app
interface.launch(debug=False)  # Turn off debug mode for production