Spaces:

KavithaSriK
/

Language_Processing_App

Sleeping

File size: 2,860 Bytes

ec5f9eb
ce70399
 
ec5f9eb
ce70399
 
ec5f9eb
ce70399
 
 
 
 
ec5f9eb
ce70399
 
 
 
 
 
 
 
 
 
ec5f9eb
ce70399
 
 
 
ec5f9eb
ce70399
 
 
 
 
 
 
ec5f9eb
ce70399
 
 
 
 
 
 
 
 
 
 
ec5f9eb
ce70399
 
 
 
 
 
 
 
ec5f9eb
ce70399
 
 
 
 
 
ec5f9eb
ce70399
 
 
ec5f9eb
 
ce70399
ec5f9eb
ce70399

import gradio as gr
import torch
from transformers import pipeline, T5ForConditionalGeneration, T5Tokenizer

# Device configuration
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# Load models
fill_mask = pipeline("fill-mask", model="bert-base-uncased")
corrector = pipeline("text2text-generation", model="pszemraj/grammar-synthesis-small")
tokenizer = T5Tokenizer.from_pretrained("EnglishVoice/t5-base-uk-to-us-english")
model = T5ForConditionalGeneration.from_pretrained("EnglishVoice/t5-base-uk-to-us-english").to(device)

# Fill Mask Function
def fill_mask_function(text):
    if "_" not in text:
        return "Please add an underscore (_) where you want the mask to be predicted."
    text_with_mask = text.replace("_", "[MASK]")
    predictions = fill_mask(text_with_mask)
    filtered = [p for p in predictions if p['token_str'].isalnum()]
    if not filtered:
        return "No valid predictions."
    return "\n".join([f"{p['sequence']} (Score: {p['score']:.4f})" for p in filtered])

# Grammar Correction Function
def grammar_correction_function(text):
    corrected = corrector(text)
    return corrected[0]['generated_text']

# UK to US English Conversion
def uk_to_us_function(text):
    try:
        input_text = f"UK to US: {text}"
        encoding = tokenizer.encode_plus(input_text, return_tensors="pt", padding=True, truncation=True, max_length=512)
        input_ids = encoding["input_ids"].to(device)
        attention_mask = encoding["attention_mask"].to(device)

        output_ids = model.generate(
            input_ids=input_ids, 
            attention_mask=attention_mask, 
            max_length=150, 
            num_beams=5, 
            early_stopping=True
        )
        result = tokenizer.decode(output_ids[0], skip_special_tokens=True)
        return result
    except Exception as e:
        return f"Error: {str(e)}"

# Interface Function
def interface_function(choice, text):
    if choice == "Fill Mask":
        return fill_mask_function(text)
    elif choice == "Grammar Correction":
        return grammar_correction_function(text)
    elif choice == "UK to US English":
        return uk_to_us_function(text)

# Gradio Interface
iface = gr.Interface(
    fn=interface_function,
    inputs=[
        gr.Radio(["Fill Mask", "Grammar Correction", "UK to US English"], label="Choose Functionality"),
        gr.Textbox(lines=3, placeholder="Enter your text here...", label="Input Text")
    ],
    outputs=gr.Textbox(label="Output Result"),
    title="Language Processing App",
    description="Choose one of the functionalities and provide input text. Supported tasks:\n- Fill Mask: Predict missing words.\n- Grammar Correction: Correct grammatical errors.\n- UK to US English: Convert British English to American English."
)

# Launch Interface
if __name__ == "__main__":
    iface.launch()