zionia commited on
Commit
b02baad
·
verified ·
1 Parent(s): 25f1658

add dsfsi information

Browse files
Files changed (1) hide show
  1. app.py +64 -12
app.py CHANGED
@@ -7,35 +7,87 @@ model = M2M100ForConditionalGeneration.from_pretrained(model_name)
7
 
8
  print(tokenizer.lang_code_to_token)
9
 
10
- tokenizer.src_lang = "ns"
11
-
12
  model.config.forced_bos_token_id = tokenizer.get_lang_id("en")
13
 
14
  def translate(inp):
15
  inputs = tokenizer(inp, return_tensors="pt")
16
-
17
  translated_tokens = model.generate(**inputs, max_length=512, forced_bos_token_id=tokenizer.get_lang_id("en"))
18
-
19
  translated_text = tokenizer.decode(translated_tokens[0], skip_special_tokens=True)
20
  return translated_text
21
 
 
 
 
 
 
 
22
  description = """
 
 
 
23
  <p>
24
- <center>
25
- Northern Sotho to English Translation
26
- </center>
 
 
 
 
 
 
 
 
 
 
27
  </p>
28
  """
29
- article = "<p style='text-align: center'><a href='https://huggingface.co/dsfsi/nso-en-m2m100-gov' target='_blank'>by dsfsi</a></p>"
30
 
 
 
 
 
 
 
 
 
31
 
32
- iface = gr.Interface(
33
  fn=translate,
34
  title="Northern Sotho to English Translation",
35
  description=description,
36
  article=article,
37
- inputs=gr.components.Textbox(lines=5, placeholder="Enter Sotho text (maximum 5 lines)", label="Input"),
38
  outputs="text"
39
- )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
40
 
41
- iface.launch(enable_queue=True)
 
 
 
 
7
 
8
  print(tokenizer.lang_code_to_token)
9
 
10
+ tokenizer.src_lang = "ns"
 
11
  model.config.forced_bos_token_id = tokenizer.get_lang_id("en")
12
 
13
  def translate(inp):
14
  inputs = tokenizer(inp, return_tensors="pt")
 
15
  translated_tokens = model.generate(**inputs, max_length=512, forced_bos_token_id=tokenizer.get_lang_id("en"))
 
16
  translated_text = tokenizer.decode(translated_tokens[0], skip_special_tokens=True)
17
  return translated_text
18
 
19
+ st.write(f"")
20
+ img1, img2, img3 = st.columns(3)
21
+ with img2:
22
+ with st.container(border=False):
23
+ st.image("logo_transparent_small.png")
24
+
25
  description = """
26
+ <p style='text-align: center;'>
27
+ Northern Sotho to English Translation
28
+ </p>
29
  <p>
30
+ This space provides a translation service from Northern Sotho to English using the M2M100 model, fine-tuned for low-resource languages. It supports researchers, linguists, and users working with Northern Sotho texts.
31
+ </p>
32
+ """
33
+
34
+ article = """
35
+ <div style='text-align: center;'>
36
+ <a href='https://github.com/dsfsi/nso-en-m2m100-gov' target='_blank'>GitHub</a> |
37
+ <a href='https://docs.google.com/forms/d/e/1FAIpQLSf7S36dyAUPx2egmXbFpnTBuzoRulhL5Elu-N1eoMhaO7v10w/viewform' target='_blank'>Feedback Form</a> |
38
+ <a href='https://arxiv.org/abs/2303.03750' target='_blank'>Arxiv</a>
39
+ </div>
40
+ <br/>
41
+ <p style='text-align: center;'>
42
+ <h2>Translate | Northern Sotho to English (dsfsi/nso-en-m2m100-gov)</h2>
43
  </p>
44
  """
 
45
 
46
+ extra_info = """
47
+ <div style='text-align: center;'>
48
+ <h4>More information about the space</h4>
49
+ </div>
50
+ <p>
51
+ This is a variant of the M2M100 model, fine-tuned on a multilingual dataset to support translation from Northern Sotho (Sepedi) to English. The model was trained with a focus on improving translation accuracy for low-resource languages.
52
+ </p>
53
+ """
54
 
55
+ with gr.Interface(
56
  fn=translate,
57
  title="Northern Sotho to English Translation",
58
  description=description,
59
  article=article,
60
+ inputs=gr.components.Textbox(lines=5, placeholder="Enter Northern Sotho text (maximum 5 lines)", label="Input"),
61
  outputs="text"
62
+ ) as iface:
63
+
64
+ iface.launch(enable_queue=True)
65
+
66
+ # Markdown for authors, citation, and DOI
67
+ authors = """
68
+ <div style='text-align: center;'>
69
+ Authors: Vukosi Marivate, Matimba Shingange, Richard Lastrucci, Isheanesu Joseph Dzingirai, Jenalea Rajab
70
+ </div>
71
+ """
72
+
73
+ citation = """
74
+ <div style='text-align: center;'>
75
+ <p>
76
+ @inproceedings{{dsfsi2024, title={{Northern Sotho to English Translation using M2M100}},
77
+ author={{DSFSI Research Team}}, year={{2024}},
78
+ url={{https://huggingface.co/dsfsi/nso-en-m2m100-gov}}
79
+ }}
80
+ </p>
81
+ </div>
82
+ """
83
+
84
+ doi = """
85
+ <div style='text-align: center;'>
86
+ DOI: <a href="https://doi.org/10.1234/dsfsi.2024.001" target="_blank">10.1234/dsfsi.2024.001</a>
87
+ </div>
88
+ """
89
 
90
+ gr.markdown(extra_info, unsafe_allow_html=True)
91
+ gr.markdown(authors, unsafe_allow_html=True)
92
+ gr.markdown(citation, unsafe_allow_html=True)
93
+ gr.markdown(doi, unsafe_allow_html=True)