Bajiyo's picture
Update app.py
cd395e6 verified
raw
history blame
1.87 kB
from gradio import Interface
import json
from tensorflow.keras.models import load_model # Assuming TensorFlow backend
#from keras.preprocessing.text import Tokenizer # Assuming Keras Tokenizer
# Model and tokenizer loading paths (replace with your actual paths)
model_path = "Bajiyo/mal_en_transliteration"
source_tokenizer_config_path = "https://huggingface.co/Bajiyo/Malayalam_transliteration/blob/main/source_tokenizer_config.json"
target_tokenizer_config_path = "https://huggingface.co/Bajiyo/Malayalam_transliteration/blob/main/target_tokenizer_config.json"
# Load the model
model = load_model(model_path)
# Load tokenizers
with open(source_tokenizer_config_path, "r") as f:
source_tokenizer = Tokenizer.from_config(json.load(f))
with open(target_tokenizer_config_path, "r") as f:
target_tokenizer = Tokenizer.from_config(json.load(f))
def transliterate(malayalam_name):
# Preprocess input (e.g., handle punctuation, special characters)
processed_name = preprocess_malayalam_name(malayalam_name) # Implement your preprocessing logic
# Tokenize the input
sequence = source_tokenizer.texts_to_sequences([processed_name])[0]
# Pad the sequence
padded_sequence = pad_sequences([sequence], maxlen=MAX_SEQ_LENGTH, padding="post")
# Make prediction
prediction = model.predict(padded_sequence)[0]
# Detokenize the predicted sequence
transliterated_name = target_tokenizer.sequences_to_texts([np.argmax(prediction)])[0]
return transliterated_name
# Define the maximum sequence length your model was trained on
MAX_SEQ_LENGTH = 49 # Replace with the actual value
interface = Interface(
fn=transliterate,
inputs="text",
outputs="text",
title="Malayalam to English Transliteration",
description="Enter a Malayalam name and get the transliterated English version.",
)
interface.launch()