Spaces:

Bajiyo
/

transliteration_ml

Runtime error

App Files Files Community

Bajiyo commited on Mar 18, 2024

Commit

51db1ee

verified ·

1 Parent(s): af8878c

Update app.py

Browse files

Files changed (1) hide show

app.py +41 -23

app.py CHANGED Viewed

@@ -2,34 +2,52 @@ import gradio as gr
 from huggingface_hub import from_pretrained_keras
 # Load the model from Hugging Face Hub
-model = from_pretrained_keras("Bajiyo/Malayalam_transliteration")
-# Function to preprocess text (replace with model-specific preprocessing if needed)
-def preprocess_text(input_text):
-  # Assuming character-level model: convert text to sequence of integer indices
-  # Replace with your specific preprocessing steps based on the model's requirements
-  # You might need tokenization or other transformations
-  # ...
-  return preprocessed_text
-def transliterate(input_text):
-  # Preprocess the input text
-  preprocessed_text = preprocess_text(input_text)
-  # Make predictions using the model
-  predictions = model.predict(preprocessed_text)
-  # Post-process the predictions if needed (replace with your logic)
-  output_text = predictions  # Assuming model outputs transliteration directly
-  return output_text
-textbox = gr.inputs.Textbox(label="Enter Malayalam Text")
-demo = gr.Interface(fn=transliterate,
-                    inputs=textbox,
-                    outputs=gr.outputs.Textbox(label="Transliteration to English"),
-                    title="Malayalam to English Transliteration"
-                    )
-demo.launch()

 from huggingface_hub import from_pretrained_keras
 # Load the model from Hugging Face Hub
+model = from_pretrained_keras("Bajiyo/ml-en-transliteration")
+import gradio as gr
+# Load the saved model and tokenizers
+import json
+from keras.models import load_model
+from keras.preprocessing.sequence import pad_sequences
+# Load the saved model
+model_path = "/content/drive/MyDrive/hugging_final/model"
+model = load_model(model_path)
+# Load tokenizer configurations
+source_tokenizer_path = "https://huggingface.co/Bajiyo/ml-en-transliteration/blob/main/source_tokenizer.json"
+with open(source_tokenizer_path, "r") as f:
+    source_tokenizer_config = json.load(f)
+target_tokenizer_path = "https://huggingface.co/Bajiyo/ml-en-transliteration/blob/main/target_tokenizer.json"
+with open(target_tokenizer_path, "r") as f:
+    target_tokenizer_config = json.load(f)
+# Reconstruct tokenizers
+from keras.preprocessing.text import tokenizer_from_json
+source_tokenizer = tokenizer_from_json(source_tokenizer_config)
+target_tokenizer = tokenizer_from_json(target_tokenizer_config)
+# Define the maximum sequence length
+max_seq_length = 50
+# Function to predict transliteration
+def predict_transliteration(input_text):
+    # Preprocess the input text
+    input_sequence = source_tokenizer.texts_to_sequences([input_text])
+    input_sequence_padded = pad_sequences(input_sequence, maxlen=max_seq_length, padding='post')
+    # Generate predictions
+    predicted_sequence = model.predict(input_sequence_padded)
+    # Decode the predicted sequence
+    predicted_text = "".join(target_tokenizer.index_word[i] for i in np.argmax(predicted_sequence, axis=-1)[0] if i != 0)
+    return predicted_text
+# Create a Gradio interface
+input_textbox = gr.inputs.Textbox(lines=2, label="Enter Malayalam text")
+output_textbox = gr.outputs.Textbox(label="Predicted Transliteration")
+gr.Interface(fn=predict_transliteration, inputs=input_textbox, outputs=output_textbox, title="Malayalam Transliteration", description="Enter Malayalam text to get its transliteration in English.").launch()