from gradio import Interface import json from tensorflow.keras.models import load_model # Assuming TensorFlow backend #from keras.preprocessing.text import Tokenizer # Assuming Keras Tokenizer # Model and tokenizer loading paths (replace with your actual paths) model_path = "Bajiyo/mal_en_transliteration" source_tokenizer_config_path = "https://huggingface.co/Bajiyo/Malayalam_transliteration/blob/main/source_tokenizer_config.json" target_tokenizer_config_path = "https://huggingface.co/Bajiyo/Malayalam_transliteration/blob/main/target_tokenizer_config.json" # Load the model model = load_model(model_path) # Load tokenizers with open(source_tokenizer_config_path, "r") as f: source_tokenizer = Tokenizer.from_config(json.load(f)) with open(target_tokenizer_config_path, "r") as f: target_tokenizer = Tokenizer.from_config(json.load(f)) def transliterate(malayalam_name): # Preprocess input (e.g., handle punctuation, special characters) processed_name = preprocess_malayalam_name(malayalam_name) # Implement your preprocessing logic # Tokenize the input sequence = source_tokenizer.texts_to_sequences([processed_name])[0] # Pad the sequence padded_sequence = pad_sequences([sequence], maxlen=MAX_SEQ_LENGTH, padding="post") # Make prediction prediction = model.predict(padded_sequence)[0] # Detokenize the predicted sequence transliterated_name = target_tokenizer.sequences_to_texts([np.argmax(prediction)])[0] return transliterated_name # Define the maximum sequence length your model was trained on MAX_SEQ_LENGTH = 49 # Replace with the actual value interface = Interface( fn=transliterate, inputs="text", outputs="text", title="Malayalam to English Transliteration", description="Enter a Malayalam name and get the transliterated English version.", ) interface.launch()