Spaces:

flax-community
/

TamilLanguageDemos

Runtime error

App Files Files Community

Abinaya Mahendiran commited on Jul 17, 2021

Commit

36338f2

1 Parent(s): ebee998

Updated app

Browse files

Files changed (3) hide show

app.py +61 -15
config.json +5 -3
images/tamil_logo.jpg +0 -0

app.py CHANGED Viewed

@@ -3,35 +3,81 @@
 """
 # Install necessary libraries
-from transformers import AutoTokenizer, GPT2LMHeadModel, pipeline
 import streamlit as st
 from pprint import pprint
 import json
 # Read the config
 with open("config.json") as f:
-    cfg = json.loads(f.read())
 # Set page layout
-st.set_page_config(layout="wide")
 # Load the model
 @st.cache(allow_output_mutation=True)
-def load_model():
-    tokenizer = AutoTokenizer.from_pretrained(cfg["model_name_or_path"])
-    model = GPT2LMHeadModel.from_pretrained(cfg["model_name_or_path"])
-    generator = pipeline("text-generation", model=model, tokenizer=tokenizer)
-    return generator, tokenizer
-with st.spinner('Loading model...'):
-    generator, tokenizer = load_model()
-# st.image("images/chef-transformer.png", width=400)
 st.header("Tamil Language Demos")
 st.markdown(
     "This demo uses [GPT2 trained on Oscar dataset](https://huggingface.co/flax-community/gpt-2-tamil) "
-    "to show language generation and other downstream tasks"
 )
-img = st.sidebar.image("images/tamil_logo.png", width=100)
-add_text_sidebar = st.sidebar.title("Select demo:")
-sampling_mode = st.sidebar.selectbox("select a demo", index=0, options=["Text Generation", "Text Classification"])

 """
 # Install necessary libraries
+from transformers import AutoTokenizer, AutoModelWithLMHead, pipeline
 import streamlit as st
 from pprint import pprint
 import json
 # Read the config
 with open("config.json") as f:
+    config = json.loads(f.read())
 # Set page layout
+st.set_page_config(
+        page_title="Tamil Language Models",
+        layout="wide",
+        initial_sidebar_state="expanded"
+    )
 # Load the model
 @st.cache(allow_output_mutation=True)
+def load_model(model_name):
+    with st.spinner('Waiting for the model to load.....'):
+        model = AutoModelWithLMHead.from_pretrained(model_name)
+        tokenizer = AutoTokenizer.from_pretrained(model_name)
+    st.success('Model loaded!!')
+    return model, tokenizer
+# Side bar
+img = st.sidebar.image("images/tamil_logo.jpg", width=380)
+# Choose the model based on selection
+page = st.sidebar.selectbox("Model", config["models"])
+data = st.sidebar.selectbox("Data", config[page])
+# Main page
 st.header("Tamil Language Demos")
 st.markdown(
     "This demo uses [GPT2 trained on Oscar dataset](https://huggingface.co/flax-community/gpt-2-tamil) "
+    "and [GPT2 trained on Oscar & Indic Corpus dataset] (https://huggingface.co/abinayam/gpt-2-tamil) "
+    "to show language generation"
 )
+if page == 'Text Generation' and data == 'Oscar':
+    st.title('Tamil text generation with GPT2')
+    st.markdown('A simple demo using gpt-2-tamil model trained on Oscar data')
+    model, tokenizer = load_model(config[data])
+    # Set default options
+    seed = st.text_input('Starting text', 'அகர முதல எழுதெல்லம்')
+    #seq_num = st.number_input('Number of sentences to generate ', 1, 20, 5)
+    max_len = st.number_input('Length of the sentence', 5, 300, 100)
+    gen_bt = st.button('Generate')
+    if gen_bt:
+        try:
+            with st.spinner('Generating...'):
+                generator = pipeline('text-generation', model=model, tokenizer=tokenizer)
+                seqs = generator(seed, max_length=max_len) # num_return_sequences=seq_num)
+            st.write(seqs)
+        except Exception as e:
+            st.exception(f'Exception: {e}')
+elif page == 'Text Generation' and data == "Oscar + Indic Corpus":
+    st.title('Tamil text generation with GPT2')
+    st.markdown('A simple demo using gpt-2-tamil model trained on Oscar data')
+    model, tokenizer = load_model(config[data])
+    # Set default options
+    seed = st.text_input('Starting text', 'அகர முதல எழுதெல்லம்')
+    #seq_num = st.number_input('Number of sentences to generate ', 1, 20, 5)
+    max_len = st.number_input('Length of the sentence', 5, 300, 100)
+    gen_bt = st.button('Generate')
+    if gen_bt:
+        try:
+            with st.spinner('Generating...'):
+                generator = pipeline('text-generation', model=model, tokenizer=tokenizer)
+                seqs = generator(seed, max_length=max_len) #num_return_sequences=seq_num)
+            st.write(seqs)
+        except Exception as e:
+            st.exception(f'Exception: {e}')
+else:
+    st.title('Tamil News classification with Finetuned GPT2')
+    st.markdown('In progress')

config.json CHANGED Viewed

@@ -1,5 +1,7 @@
 {
-    "model_name_or_path": "flax-community/gpt-2-tamil",
-    "Text Generation": ["example_1", "example_2"],
-    "Text Classification": ["example_2", "example_2"]
 }

 {
+    "models": ["Text Generation", "Text Classification"],
+    "Text Generation": ["Oscar", "Oscar + Indic Corpus"],
+    "Text Classification": ["News Data"],
+    "Oscar": "flax-community/gpt-2-tamil",
+    "Oscar + Indic Corpus": "abinayam/gpt-2-tamil"
 }

images/tamil_logo.jpg ADDED Viewed