from gradio import Interface
import json
from tensorflow.keras.models import load_model  # Assuming TensorFlow backend
#from keras.preprocessing.text import Tokenizer  # Assuming Keras Tokenizer

# Model and tokenizer loading paths (replace with your actual paths)
model_path = "Bajiyo/mal_en_transliteration"
source_tokenizer_config_path = "https://huggingface.co/Bajiyo/Malayalam_transliteration/blob/main/source_tokenizer_config.json"
target_tokenizer_config_path = "https://huggingface.co/Bajiyo/Malayalam_transliteration/blob/main/target_tokenizer_config.json"

# Load the model
model = load_model(model_path)

# Load tokenizers
with open(source_tokenizer_config_path, "r") as f:
    source_tokenizer = Tokenizer.from_config(json.load(f))

with open(target_tokenizer_config_path, "r") as f:
    target_tokenizer = Tokenizer.from_config(json.load(f))


def transliterate(malayalam_name):
    # Preprocess input (e.g., handle punctuation, special characters)
    processed_name = preprocess_malayalam_name(malayalam_name)  # Implement your preprocessing logic

    # Tokenize the input
    sequence = source_tokenizer.texts_to_sequences([processed_name])[0]

    # Pad the sequence
    padded_sequence = pad_sequences([sequence], maxlen=MAX_SEQ_LENGTH, padding="post")

    # Make prediction
    prediction = model.predict(padded_sequence)[0]

    # Detokenize the predicted sequence
    transliterated_name = target_tokenizer.sequences_to_texts([np.argmax(prediction)])[0]

    return transliterated_name

# Define the maximum sequence length your model was trained on
MAX_SEQ_LENGTH = 49  # Replace with the actual value

interface = Interface(
    fn=transliterate,
    inputs="text",
    outputs="text",
    title="Malayalam to English Transliteration",
    description="Enter a Malayalam name and get the transliterated English version.",
)

interface.launch()