Spaces:
Running
Running
Update app.py
Browse files
app.py
CHANGED
@@ -10,10 +10,9 @@ st.write("Imagine you're making a chatbot that will answer very general question
|
|
10 |
st.write("If you have very little amount of data, you could actually augment it through language models. There are regex based tools you can use but they tend to create bias due to repetitive patterns, so it's better to use language models for this case. A good model to use is a generative model fine-tuned on Quora Question Pairs dataset. This dataset consists of question pairs that are paraphrase of one another, and T5 can generate a paraphrased question given a source question.")
|
11 |
st.write("Try it yourself here ππ»")
|
12 |
|
13 |
-
|
14 |
-
|
15 |
-
|
16 |
-
generator = load_qqp()
|
17 |
default_value = "How can I put out grease fire?"
|
18 |
sent = st.text_area("Input", default_value, height = 10)
|
19 |
outputs = generator(sent)
|
@@ -25,14 +24,11 @@ st.write("Scaling your chatbot across different languages is expensive and cumbe
|
|
25 |
st.image("./Translation.png")
|
26 |
st.write("Your English intent classification model will be between these two models, your German to English model will translate the input to English and the output will go through the intent classification model, which will classify intent and select appropriate response (which is currently in English). The response will be translated back to German, which you can do in advance and do proofreading with a native speaker or directly pass it to a from English to German language model. For this use case, I highly recommend specific translation models instead of using sequence-to-sequence multilingual models like T5. ")
|
27 |
|
28 |
-
def load_translation():
|
29 |
-
model_checkpoint = "Helsinki-NLP/opus-mt-en-fr"
|
30 |
-
model = pipeline("translation", model=model_checkpoint)
|
31 |
-
return model
|
32 |
|
33 |
-
translator =
|
34 |
-
|
35 |
-
|
|
|
36 |
st.write("Translated Example:")
|
37 |
translated_text = translator("How are you?")
|
38 |
st.write(outputs[0]["translation_text"])
|
|
|
10 |
st.write("If you have very little amount of data, you could actually augment it through language models. There are regex based tools you can use but they tend to create bias due to repetitive patterns, so it's better to use language models for this case. A good model to use is a generative model fine-tuned on Quora Question Pairs dataset. This dataset consists of question pairs that are paraphrase of one another, and T5 can generate a paraphrased question given a source question.")
|
11 |
st.write("Try it yourself here ππ»")
|
12 |
|
13 |
+
|
14 |
+
generator = pipeline("text2text-generation", model = "mrm8488/t5-small-finetuned-quora-for-paraphrasing")
|
15 |
+
|
|
|
16 |
default_value = "How can I put out grease fire?"
|
17 |
sent = st.text_area("Input", default_value, height = 10)
|
18 |
outputs = generator(sent)
|
|
|
24 |
st.image("./Translation.png")
|
25 |
st.write("Your English intent classification model will be between these two models, your German to English model will translate the input to English and the output will go through the intent classification model, which will classify intent and select appropriate response (which is currently in English). The response will be translated back to German, which you can do in advance and do proofreading with a native speaker or directly pass it to a from English to German language model. For this use case, I highly recommend specific translation models instead of using sequence-to-sequence multilingual models like T5. ")
|
26 |
|
|
|
|
|
|
|
|
|
27 |
|
28 |
+
translator = pipeline("translation", model="Helsinki-NLP/opus-mt-en-fr")
|
29 |
+
|
30 |
+
input = st.text_area("Input", default_value, height = 10)
|
31 |
+
outputs = translator(input)
|
32 |
st.write("Translated Example:")
|
33 |
translated_text = translator("How are you?")
|
34 |
st.write(outputs[0]["translation_text"])
|