Spaces:
Runtime error
Runtime error
File size: 1,562 Bytes
b60285f |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 |
import io
import pandas as pd
import streamlit as st
from transformers import AutoTokenizer
from tapas_visualizer import TapasVisualizer
st.set_page_config(page_title="Tapas Tokenizer", page_icon='🍽️', layout="wide")
def set_file_input():
st.session_state.input_stream = "file"
def set_text_input():
st.session_state.input_stream = "text"
def main():
models = [
"google/tapas-base",
"deepset/tapas-large-nq-hn-reader"
]
@st.cache()
def load_tokenizer():
tokenizer = AutoTokenizer.from_pretrained(selected_model)
return tokenizer
col1, col2 = st.columns([1, 2])
with col1:
selected_model = st.selectbox("Select a tokenizer", models, key=1)
text = st.text_area(label="", placeholder="Table to tokenize; csv", on_change=set_text_input)
uploaded_file = st.file_uploader("(Or) Choose a file", on_change=set_file_input)
button_clicked = st.button("Tokenize")
tokenizer = load_tokenizer()
visualizer = TapasVisualizer(tokenizer)
with col2:
if text or uploaded_file or button_clicked:
df: pd.DataFrame
if 'input_stream' not in st.session_state or st.session_state.input_stream == "text":
df = pd.read_csv(io.StringIO(text), sep=",")
elif st.session_state.input_stream == "file":
df = pd.read_csv(uploaded_file)
if df is not None:
st.components.v1.html(visualizer(df.astype(str)), height=1500)
if __name__ == '__main__':
main() |