bglearning commited on
Commit
b60285f
·
1 Parent(s): 916e1eb

Add streamlit app

Browse files
Files changed (1) hide show
  1. app.py +55 -0
app.py ADDED
@@ -0,0 +1,55 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import io
2
+
3
+ import pandas as pd
4
+ import streamlit as st
5
+
6
+ from transformers import AutoTokenizer
7
+
8
+ from tapas_visualizer import TapasVisualizer
9
+
10
+ st.set_page_config(page_title="Tapas Tokenizer", page_icon='‍🍽️', layout="wide")
11
+
12
+ def set_file_input():
13
+ st.session_state.input_stream = "file"
14
+
15
+ def set_text_input():
16
+ st.session_state.input_stream = "text"
17
+
18
+
19
+ def main():
20
+
21
+ models = [
22
+ "google/tapas-base",
23
+ "deepset/tapas-large-nq-hn-reader"
24
+ ]
25
+
26
+ @st.cache()
27
+ def load_tokenizer():
28
+ tokenizer = AutoTokenizer.from_pretrained(selected_model)
29
+ return tokenizer
30
+
31
+ col1, col2 = st.columns([1, 2])
32
+ with col1:
33
+ selected_model = st.selectbox("Select a tokenizer", models, key=1)
34
+ text = st.text_area(label="", placeholder="Table to tokenize; csv", on_change=set_text_input)
35
+ uploaded_file = st.file_uploader("(Or) Choose a file", on_change=set_file_input)
36
+ button_clicked = st.button("Tokenize")
37
+
38
+ tokenizer = load_tokenizer()
39
+ visualizer = TapasVisualizer(tokenizer)
40
+
41
+ with col2:
42
+ if text or uploaded_file or button_clicked:
43
+ df: pd.DataFrame
44
+
45
+ if 'input_stream' not in st.session_state or st.session_state.input_stream == "text":
46
+ df = pd.read_csv(io.StringIO(text), sep=",")
47
+ elif st.session_state.input_stream == "file":
48
+ df = pd.read_csv(uploaded_file)
49
+
50
+ if df is not None:
51
+ st.components.v1.html(visualizer(df.astype(str)), height=1500)
52
+
53
+
54
+ if __name__ == '__main__':
55
+ main()