Spaces:
Runtime error
Runtime error
Commit
·
588a02c
1
Parent(s):
32fbbc2
Add/update docstring
Browse files- README.md +1 -1
- tapas_visualizer.py +14 -6
README.md
CHANGED
|
@@ -1,6 +1,6 @@
|
|
| 1 |
---
|
| 2 |
title: Tapas Tokenizer Viz
|
| 3 |
-
emoji:
|
| 4 |
colorFrom: blue
|
| 5 |
colorTo: indigo
|
| 6 |
sdk: streamlit
|
|
|
|
| 1 |
---
|
| 2 |
title: Tapas Tokenizer Viz
|
| 3 |
+
emoji: 🍽️
|
| 4 |
colorFrom: blue
|
| 5 |
colorTo: indigo
|
| 6 |
sdk: streamlit
|
tapas_visualizer.py
CHANGED
|
@@ -1,3 +1,8 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
import os
|
| 2 |
from typing import Any, List, Dict
|
| 3 |
|
|
@@ -5,22 +10,24 @@ from collections import defaultdict
|
|
| 5 |
|
| 6 |
import pandas as pd
|
| 7 |
|
|
|
|
|
|
|
| 8 |
dirname = os.path.dirname(__file__)
|
| 9 |
css_filename = os.path.join(dirname, "tapas-styles.css")
|
| 10 |
with open(css_filename) as f:
|
| 11 |
css = f.read()
|
| 12 |
|
| 13 |
|
| 14 |
-
def HTMLBody(table_html: str, css_styles=css) -> str:
|
| 15 |
"""
|
| 16 |
Generates the full html with css from a list of html spans
|
| 17 |
|
| 18 |
Args:
|
| 19 |
-
|
| 20 |
-
|
| 21 |
|
| 22 |
-
css_styles (
|
| 23 |
-
|
| 24 |
|
| 25 |
Returns:
|
| 26 |
:obj:`str`: An HTML string with style markup
|
|
@@ -42,10 +49,11 @@ def HTMLBody(table_html: str, css_styles=css) -> str:
|
|
| 42 |
|
| 43 |
|
| 44 |
class TapasVisualizer:
|
| 45 |
-
def __init__(self, tokenizer) -> None:
|
| 46 |
self.tokenizer = tokenizer
|
| 47 |
|
| 48 |
def normalize_token_str(self, token_str: str) -> str:
|
|
|
|
| 49 |
return token_str.replace("##", "")
|
| 50 |
|
| 51 |
def style_span(self, span_text: str, css_classes: List[str]) -> str:
|
|
|
|
| 1 |
+
"""Visualizer for TAPAS
|
| 2 |
+
|
| 3 |
+
Implementation heavily based on
|
| 4 |
+
`EncodingVisualizer` from `tokenizers.tools`.
|
| 5 |
+
"""
|
| 6 |
import os
|
| 7 |
from typing import Any, List, Dict
|
| 8 |
|
|
|
|
| 10 |
|
| 11 |
import pandas as pd
|
| 12 |
|
| 13 |
+
from transformers import TapasTokenizer
|
| 14 |
+
|
| 15 |
dirname = os.path.dirname(__file__)
|
| 16 |
css_filename = os.path.join(dirname, "tapas-styles.css")
|
| 17 |
with open(css_filename) as f:
|
| 18 |
css = f.read()
|
| 19 |
|
| 20 |
|
| 21 |
+
def HTMLBody(table_html: str, css_styles: str = css) -> str:
|
| 22 |
"""
|
| 23 |
Generates the full html with css from a list of html spans
|
| 24 |
|
| 25 |
Args:
|
| 26 |
+
table_html (str):
|
| 27 |
+
The html string of the table
|
| 28 |
|
| 29 |
+
css_styles (str):
|
| 30 |
+
CSS styling to be embedded inline
|
| 31 |
|
| 32 |
Returns:
|
| 33 |
:obj:`str`: An HTML string with style markup
|
|
|
|
| 49 |
|
| 50 |
|
| 51 |
class TapasVisualizer:
|
| 52 |
+
def __init__(self, tokenizer: TapasTokenizer) -> None:
|
| 53 |
self.tokenizer = tokenizer
|
| 54 |
|
| 55 |
def normalize_token_str(self, token_str: str) -> str:
|
| 56 |
+
# Normalize subword tokens to org subword str
|
| 57 |
return token_str.replace("##", "")
|
| 58 |
|
| 59 |
def style_span(self, span_text: str, css_classes: List[str]) -> str:
|