File size: 2,860 Bytes
ec5f9eb
ce70399
 
ec5f9eb
ce70399
 
ec5f9eb
ce70399
 
 
 
 
ec5f9eb
ce70399
 
 
 
 
 
 
 
 
 
ec5f9eb
ce70399
 
 
 
ec5f9eb
ce70399
 
 
 
 
 
 
ec5f9eb
ce70399
 
 
 
 
 
 
 
 
 
 
ec5f9eb
ce70399
 
 
 
 
 
 
 
ec5f9eb
ce70399
 
 
 
 
 
ec5f9eb
ce70399
 
 
ec5f9eb
 
ce70399
ec5f9eb
ce70399
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
import gradio as gr
import torch
from transformers import pipeline, T5ForConditionalGeneration, T5Tokenizer

# Device configuration
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# Load models
fill_mask = pipeline("fill-mask", model="bert-base-uncased")
corrector = pipeline("text2text-generation", model="pszemraj/grammar-synthesis-small")
tokenizer = T5Tokenizer.from_pretrained("EnglishVoice/t5-base-uk-to-us-english")
model = T5ForConditionalGeneration.from_pretrained("EnglishVoice/t5-base-uk-to-us-english").to(device)

# Fill Mask Function
def fill_mask_function(text):
    if "_" not in text:
        return "Please add an underscore (_) where you want the mask to be predicted."
    text_with_mask = text.replace("_", "[MASK]")
    predictions = fill_mask(text_with_mask)
    filtered = [p for p in predictions if p['token_str'].isalnum()]
    if not filtered:
        return "No valid predictions."
    return "\n".join([f"{p['sequence']} (Score: {p['score']:.4f})" for p in filtered])

# Grammar Correction Function
def grammar_correction_function(text):
    corrected = corrector(text)
    return corrected[0]['generated_text']

# UK to US English Conversion
def uk_to_us_function(text):
    try:
        input_text = f"UK to US: {text}"
        encoding = tokenizer.encode_plus(input_text, return_tensors="pt", padding=True, truncation=True, max_length=512)
        input_ids = encoding["input_ids"].to(device)
        attention_mask = encoding["attention_mask"].to(device)

        output_ids = model.generate(
            input_ids=input_ids, 
            attention_mask=attention_mask, 
            max_length=150, 
            num_beams=5, 
            early_stopping=True
        )
        result = tokenizer.decode(output_ids[0], skip_special_tokens=True)
        return result
    except Exception as e:
        return f"Error: {str(e)}"

# Interface Function
def interface_function(choice, text):
    if choice == "Fill Mask":
        return fill_mask_function(text)
    elif choice == "Grammar Correction":
        return grammar_correction_function(text)
    elif choice == "UK to US English":
        return uk_to_us_function(text)

# Gradio Interface
iface = gr.Interface(
    fn=interface_function,
    inputs=[
        gr.Radio(["Fill Mask", "Grammar Correction", "UK to US English"], label="Choose Functionality"),
        gr.Textbox(lines=3, placeholder="Enter your text here...", label="Input Text")
    ],
    outputs=gr.Textbox(label="Output Result"),
    title="Language Processing App",
    description="Choose one of the functionalities and provide input text. Supported tasks:\n- Fill Mask: Predict missing words.\n- Grammar Correction: Correct grammatical errors.\n- UK to US English: Convert British English to American English."
)

# Launch Interface
if __name__ == "__main__":
    iface.launch()