Update app.py
Browse files
app.py
CHANGED
@@ -517,10 +517,113 @@ def predict_tags(test_sentence):
|
|
517 |
|
518 |
predict_tags(test_sentence)
|
519 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
520 |
|
521 |
tagger = gr.Interface(
|
522 |
predict_tags,
|
523 |
gr.Textbox(placeholder="Enter sentence here..."),
|
|
|
524 |
["highlight"],
|
525 |
title="BERT Filipino Part of Speech Tagger",
|
526 |
description="Enter a text in Tagalog to classify the tags for each word. Each word to tag needs to be space separated.",
|
|
|
517 |
|
518 |
predict_tags(test_sentence)
|
519 |
|
520 |
+
def get_readme():
|
521 |
+
return
|
522 |
+
"""
|
523 |
+
|
524 |
+
This is a BERT Tagalog Base Uncased Part of Speech tagger fine-tuned model of [Jiang et. al.'s pre-trained bert-tagalog-base-uncased model](https://huggingface.co/GKLMIP/bert-tagalog-base-uncased).
|
525 |
+
|
526 |
+
## Model Details
|
527 |
+
|
528 |
+
### Model Description
|
529 |
+
|
530 |
+
<!-- Provide a longer summary of what this model is. -->
|
531 |
+
|
532 |
+
- **Developed by:** syke9p3, mnemoria, xenoxia, riakm
|
533 |
+
- **Shared by:** syke9p3
|
534 |
+
- **Model type:** BERT Tagalog Base Uncased
|
535 |
+
- **Languages (NLP):** Tagalog, Filipino
|
536 |
+
- **Dataset:** Sagum et. al.'s annotated Tagalog Corpora based on MGNN Tagset convention. This model was trained in 800 sentences and evaluated with 200 sentences.
|
537 |
+
- **Finetuned from model**: [Jiang et. al.'s pre-trained bert-tagalog-base-uncased model](https://huggingface.co/GKLMIP/bert-tagalog-base-uncased)
|
538 |
+
|
539 |
+
## GitHub Link
|
540 |
+
[syke9p3/bert-tagalog-pos-tagger](https://github.com/syke9p3/bert-tagalog-pos-tagger)
|
541 |
+
|
542 |
+
### Tags
|
543 |
+
|
544 |
+
| Part of Speech | Tags |
|
545 |
+
|-----------------------------------------------|------|
|
546 |
+
| **Noun** | NNC |
|
547 |
+
| Common Noun | NNC |
|
548 |
+
| Proper Noun | NNP |
|
549 |
+
| Proper Noun Abbreviation | NNPA |
|
550 |
+
| Common Noun Abbreviation | NNCA |
|
551 |
+
| **Pronoun** | PR |
|
552 |
+
| as Subject (Palagyo)/Personal Pronouns Singular | PRS |
|
553 |
+
| Personal Pronouns | PRP |
|
554 |
+
| Possessive Subject (Paari) | PRSP |
|
555 |
+
| Pointing to an Object Demonstrative/(Paturol/Pamatlig) | PRO |
|
556 |
+
| Question/Interrogative (Pananong)/Singular | PRQ |
|
557 |
+
| Question/Interrogative Plural | PRQP |
|
558 |
+
| Location (Panlunan) | PRL |
|
559 |
+
| Comparison (Panulad) | PRC |
|
560 |
+
| Found (Pahimaton) | PRF |
|
561 |
+
| Indefinite | PRI |
|
562 |
+
| **Determiner** | DT |
|
563 |
+
| Determiner (Pantukoy) for Common Noun Plural | DTC |
|
564 |
+
| Determiner (Pantukoy) for Proper Noun | DTP |
|
565 |
+
| Determiner (Pantukoy) for Proper Noun Plural | DTPP |
|
566 |
+
| Lexical Marker | LM |
|
567 |
+
| Conjunctions (Pang-ugnay) | CC, CCT, CCR, CCB, CCA |
|
568 |
+
| Ligatures (Pang-angkop) | CCP |
|
569 |
+
| Preposition (Pang-ukol) | CCU |
|
570 |
+
| **Verb (Pandiwa)** | VB |
|
571 |
+
| Neutral/Infinitive | VBW |
|
572 |
+
| Auxiliary, Modal/Pseudo-verbs | VBS |
|
573 |
+
| Existential | VBH |
|
574 |
+
| Non-existential | VBN |
|
575 |
+
| Time Past (Perfective) | VBTS |
|
576 |
+
| Time Present (Imperfective) | VBTR |
|
577 |
+
| Time Future (Contemplative) | VBTF |
|
578 |
+
| Recent past | VBTP |
|
579 |
+
| Actor Focus | VBAF |
|
580 |
+
| Object/Goal Focus | VBOF |
|
581 |
+
| Benefactive Focus | VBOB |
|
582 |
+
| Locative Focus | VBOL |
|
583 |
+
| Instrumental Focus | VBOI |
|
584 |
+
| Referential/Measurement Focus | VBRF |
|
585 |
+
| **Adjective (Pang-uri)** | JJ |
|
586 |
+
| Describing (Panlarawan) | JJD |
|
587 |
+
| Used for Comparison (same level) (Pahambing Magkatulad) | JJC |
|
588 |
+
| Comparison Comparative (more) (Palamang) | JJCC |
|
589 |
+
| Comparison Superlative (most) (Pasukdol) | JJCS |
|
590 |
+
| Comparison Negation (not quite) (Di-Magkatulad) | JJCN |
|
591 |
+
| Describing Number (Pamilang) | JJN |
|
592 |
+
| **Adverb (Pang-Abay)** | RB |
|
593 |
+
| Describing “How” (Pamaraan) | RBD |
|
594 |
+
| Number (Panggaano/Panukat) | RBN |
|
595 |
+
| Conditional (Kondisyunal) | RBK |
|
596 |
+
| Causative (Pananhi) | RBP |
|
597 |
+
| Benefactive (Benepaktibo) | RBB |
|
598 |
+
| Referential (Pangkaukulan) | RBR |
|
599 |
+
| Question (Pananong) | RBQ |
|
600 |
+
| Agree (Panang-ayon) | RBT |
|
601 |
+
| Disagree (Pananggi) | RBF |
|
602 |
+
| Frequency (Pamanahon) | RBW |
|
603 |
+
| Possibility (Pang-agam) | RBM |
|
604 |
+
| Place (Panlunan) | RBL |
|
605 |
+
| Enclitics (Paningit) | RBI |
|
606 |
+
| Interjections (Sambitla) | RBJ |
|
607 |
+
| Social Formula (Pormularyong Panlipunan) | RBS |
|
608 |
+
|**Cardinal Number (Bilang)** | CD |
|
609 |
+
| Digit, Rank, Count | CDB |
|
610 |
+
| **Topicless (Walang Paksa)** | TS |
|
611 |
+
| Foreign Words | FW |
|
612 |
+
| **Punctuation (Pananda)** | PM |
|
613 |
+
| Period | PMP |
|
614 |
+
| Exclamation Point | PME |
|
615 |
+
| Question Mark | PMQ |
|
616 |
+
| Comma | PMC |
|
617 |
+
| Semi-colon | PMSC |
|
618 |
+
| **Symbols** | PMS |
|
619 |
+
|
620 |
+
"""
|
621 |
+
|
622 |
|
623 |
tagger = gr.Interface(
|
624 |
predict_tags,
|
625 |
gr.Textbox(placeholder="Enter sentence here..."),
|
626 |
+
gr.Markdown(get_readme()),
|
627 |
["highlight"],
|
628 |
title="BERT Filipino Part of Speech Tagger",
|
629 |
description="Enter a text in Tagalog to classify the tags for each word. Each word to tag needs to be space separated.",
|