bglearning commited on
Commit
71b2a17
·
1 Parent(s): c749499

Add cumulative token count

Browse files
Files changed (1) hide show
  1. tapas_visualizer.py +7 -3
tapas_visualizer.py CHANGED
@@ -115,7 +115,7 @@ class TapasVisualizer:
115
  token_text = self.tokenizer._convert_id_to_token(input_id)
116
  cell_tokens[(row_id, col_id)].append(token_text)
117
 
118
- # token_df = pd.DataFrame(token_data, columns=['id', 'token', 'segment_id', 'column_id', 'row_id'])
119
  header_row_html = ""
120
  header_row_token_cnt = 0
121
  for col_id, col in enumerate(table.columns, start=1):
@@ -124,7 +124,9 @@ class TapasVisualizer:
124
  cell_html = "".join(span_htmls)
125
  header_row_html += f"<th>{cell_html}</th>"
126
  header_row_token_cnt += len(cur_cell_tokens)
127
- header_row_html += f'<th style="border: none;">{self.style_span(header_row_token_cnt, ["non-token", "count"])}</th>'
 
 
128
  header_row_html = f'<tr>{header_row_html}</tr>'
129
 
130
  table_vals = table.values
@@ -140,7 +142,9 @@ class TapasVisualizer:
140
  cell_html = "".join(span_htmls)
141
  row_html += f"<td>{cell_html}</td>"
142
  row_token_cnt += len(cur_cell_tokens)
143
- row_html += f'<td style="border: none;">{self.style_span(row_token_cnt, ["non-token", "count"])}</td>'
 
 
144
  table_html += f'<tr>{row_html}</tr>'
145
 
146
  table_html = f'<table>{table_html}</table>'
 
115
  token_text = self.tokenizer._convert_id_to_token(input_id)
116
  cell_tokens[(row_id, col_id)].append(token_text)
117
 
118
+ cumulative_cnt = 0
119
  header_row_html = ""
120
  header_row_token_cnt = 0
121
  for col_id, col in enumerate(table.columns, start=1):
 
124
  cell_html = "".join(span_htmls)
125
  header_row_html += f"<th>{cell_html}</th>"
126
  header_row_token_cnt += len(cur_cell_tokens)
127
+ cumulative_cnt += header_row_token_cnt
128
+ cnt_str = f'{header_row_token_cnt} | {cumulative_cnt}'
129
+ header_row_html += f'<th style="border: none;">{self.style_span(cnt_str, ["non-token", "count"])}</th>'
130
  header_row_html = f'<tr>{header_row_html}</tr>'
131
 
132
  table_vals = table.values
 
142
  cell_html = "".join(span_htmls)
143
  row_html += f"<td>{cell_html}</td>"
144
  row_token_cnt += len(cur_cell_tokens)
145
+ cumulative_cnt += row_token_cnt
146
+ cnt_str = f'{row_token_cnt} | {cumulative_cnt}'
147
+ row_html += f'<td style="border: none;">{self.style_span(cnt_str, ["non-token", "count"])}</td>'
148
  table_html += f'<tr>{row_html}</tr>'
149
 
150
  table_html = f'<table>{table_html}</table>'