File size: 1,562 Bytes
b60285f
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
import io   

import pandas as pd
import streamlit as st

from transformers import AutoTokenizer

from tapas_visualizer import TapasVisualizer

st.set_page_config(page_title="Tapas Tokenizer", page_icon='‍🍽️', layout="wide")

def set_file_input():
    st.session_state.input_stream = "file"

def set_text_input():
    st.session_state.input_stream = "text"


def main():

    models = [
        "google/tapas-base",
        "deepset/tapas-large-nq-hn-reader"
    ]

    @st.cache()
    def load_tokenizer():
        tokenizer = AutoTokenizer.from_pretrained(selected_model)
        return tokenizer

    col1, col2 = st.columns([1, 2])
    with col1:
        selected_model = st.selectbox("Select a tokenizer", models, key=1)
        text = st.text_area(label="", placeholder="Table to tokenize; csv", on_change=set_text_input)
        uploaded_file = st.file_uploader("(Or) Choose a file", on_change=set_file_input)
        button_clicked = st.button("Tokenize")

    tokenizer = load_tokenizer()
    visualizer = TapasVisualizer(tokenizer)

    with col2:
        if text or uploaded_file or button_clicked:
            df: pd.DataFrame

            if 'input_stream' not in st.session_state or st.session_state.input_stream == "text":
                df = pd.read_csv(io.StringIO(text), sep=",")
            elif st.session_state.input_stream == "file":
                df = pd.read_csv(uploaded_file)

            if df is not None:
                st.components.v1.html(visualizer(df.astype(str)), height=1500)


if __name__ == '__main__':
    main()