import gradio as gr from transformers import pipeline, AutoTokenizer # Load the text classification model classifier = pipeline('text-classification', model='ardavey/bert-base-ai-generated-text') # Load the tokenizer to handle text preprocessing tokenizer = AutoTokenizer.from_pretrained('ardavey/bert-base-ai-generated-text') # Define a function to truncate or split the input text def preprocess_long_text(text, tokenizer, max_length=512): # Tokenize the text tokens = tokenizer.encode(text, add_special_tokens=True) # Split into chunks of max_length chunks = [tokens[i:i + max_length] for i in range(0, len(tokens), max_length)] # Decode back to text return [tokenizer.decode(chunk, skip_special_tokens=True) for chunk in chunks] # Define a function for text classification def classify_text(text): # Preprocess the text for long input chunks = preprocess_long_text(text, tokenizer) # Make predictions for each chunk predictions = [classifier(chunk)[0] for chunk in chunks] # Aggregate results (you can customize this logic) ai_scores = [pred['score'] for pred in predictions if pred['label'] == 'LABEL_1'] human_scores = [pred['score'] for pred in predictions if pred['label'] == 'LABEL_0'] # Determine the overall prediction if sum(ai_scores) > sum(human_scores): label = "AI Generated Text" score = sum(ai_scores) / len(ai_scores) else: label = "Human Generated Text" score = sum(human_scores) / len(human_scores) return f"Prediction: {label}, Average Score: {score:.4f}" # Create a Gradio interface interface = gr.Interface( fn=classify_text, inputs=gr.Textbox(lines=5, placeholder="Enter your text here..."), outputs="text", title="AI Generated Text Detector", description="Enter a text to check whether the content is written by AI or Human." ) # Launch the Gradio app interface.launch()