jaothan commited on
Commit
0f2dc21
Β·
verified Β·
1 Parent(s): a5958fb

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +201 -0
app.py ADDED
@@ -0,0 +1,201 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ import numpy as np
3
+ import pandas as pd
4
+ import argparse
5
+
6
+ def make_default_md():
7
+ leaderboard_md = f"""
8
+ # πŸ† LLms Benchmark
9
+
10
+ The main goal of this project is to utilize Large Language Models (LLMs) to extract specific information from PDF documents and organize it into a structured JSON format.
11
+
12
+ To achieve this objective, we are assessing various LLMs on two benchmarks:
13
+
14
+ 1. [Benchmark1](https://huggingface.co/spaces/Nechba/LLms-Benchmark/blob/main/dataset.jsonl):
15
+ This benchmark consists of a dataset of 59 pages as context and corresponding JSON extracts from "Interchange and Service Fees Manual: Europe Region".
16
+
17
+ 2. [Benchmark2](https://huggingface.co/datasets/Effyis/Table-Extraction):
18
+ This benchmark comprises a dataset of 16573 tables as context and corresponding JSON extracts.
19
+ """
20
+ return leaderboard_md
21
+
22
+
23
+ def make_arena_leaderboard_md(total_models):
24
+ leaderboard_md = f"""
25
+ Total #models: **{total_models}**. Last updated: Juin 01, 2024.
26
+
27
+ """
28
+ return leaderboard_md
29
+
30
+ def model_hyperlink(model_name, link):
31
+ return f'<a target="_blank" href="{link}" style="color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;">{model_name}</a>'
32
+
33
+ def load_leaderboard_table_csv(filename, add_hyperlink=True):
34
+ rows = []
35
+ with open(filename, 'r') as file:
36
+ lines = file.readlines()
37
+ heads = [v.strip() for v in lines[0].split(",")]
38
+ for line in lines[1:]:
39
+ row = [v.strip() for v in line.split(",")]
40
+ item = {}
41
+ for h, v in zip(heads, row):
42
+ item[h] = v
43
+ if add_hyperlink:
44
+ item["Model"] = model_hyperlink(item["Model"], item["Link"])
45
+ item["Notebook link"] = model_hyperlink("Notebook", item["Notebook link"])
46
+ rows.append(item)
47
+ return rows
48
+
49
+ def get_arena_table(model_table_df):
50
+ # change type Percentage of values column of df
51
+ model_table_df["Percentage of values"] = model_table_df["Percentage of values"].astype(float)
52
+ model_table_df["Percentage of keys"] = model_table_df["Percentage of keys"].astype(float)
53
+ model_table_df["Average time (s)"] = model_table_df["Average time (s)"].astype(float)
54
+ arena_df = model_table_df.sort_values(by=["Percentage of values"], ascending=False)
55
+ values = []
56
+ if not arena_df.empty: # Check if arena_df is not empty
57
+ for i in range(len(arena_df)):
58
+ row = []
59
+ model_name = arena_df["Model"].values[i] # Access model name directly without index 0
60
+ row.append(model_name)
61
+ row.append(arena_df.iloc[i]["Percentage of values"])
62
+ row.append(arena_df.iloc[i]["Percentage of keys"])
63
+ row.append(arena_df.iloc[i]["Average time (s)"])
64
+ row.append(arena_df.iloc[i]["Notebook link"])
65
+ row.append(arena_df.iloc[i]["License"])
66
+ # row.append(arena_df.iloc[i]["Link"])
67
+ values.append(row)
68
+ return values
69
+
70
+
71
+
72
+ def build_leaderboard_tab(leaderboard_table_file1, leaderboard_table_file2, show_plot=False):
73
+ default_md = make_default_md()
74
+ md_1 = gr.Markdown(default_md, elem_id="leaderboard_markdown")
75
+
76
+ if leaderboard_table_file1:
77
+ data1 = load_leaderboard_table_csv(leaderboard_table_file1)
78
+ model_table_df1 = pd.DataFrame(data1)
79
+ data2 = load_leaderboard_table_csv(leaderboard_table_file2)
80
+ model_table_df2 = pd.DataFrame(data2)
81
+
82
+ with gr.Tabs() as tabs:
83
+ with gr.Tab(" πŸ… Benchmark 1", id=0):
84
+ arena_table_vals = get_arena_table(model_table_df1)
85
+ md = make_arena_leaderboard_md(len(arena_table_vals))
86
+ gr.Markdown(md, elem_id="leaderboard_markdown")
87
+
88
+ # Remove height argument
89
+ gr.Dataframe(
90
+ headers=[
91
+ "Model",
92
+ "Percentage of values (%)",
93
+ "Percentage of keys (%)",
94
+ "Average time (s)",
95
+ "Code",
96
+ "License",
97
+ ],
98
+ datatype=[
99
+ "markdown",
100
+ "number",
101
+ "number",
102
+ "number",
103
+ "markdown",
104
+ "str"
105
+ ],
106
+ value=arena_table_vals,
107
+ elem_id="arena_leaderboard_dataframe",
108
+ column_widths=[200, 150, 150, 130, 100, 140],
109
+ wrap=True,
110
+ )
111
+
112
+ # Display additional Markdown notes as needed...
113
+
114
+ with gr.Tab("πŸ… Benchmark 2", id=1):
115
+ arena_table_vals = get_arena_table(model_table_df2)
116
+ md = make_arena_leaderboard_md(len(arena_table_vals))
117
+ gr.Markdown(md, elem_id="leaderboard_markdown")
118
+
119
+ # Remove height argument
120
+ gr.Dataframe(
121
+ headers=[
122
+ "Model",
123
+ "Percentage of values (%)",
124
+ "Percentage of keys (%)",
125
+ "Average time (s)",
126
+ "Code",
127
+ "License",
128
+ ],
129
+ datatype=[
130
+ "markdown",
131
+ "number",
132
+ "number",
133
+ "number",
134
+ "markdown",
135
+ "str"
136
+ ],
137
+ value=arena_table_vals,
138
+ elem_id="arena_leaderboard_dataframe",
139
+ column_widths=[200, 150, 150, 130, 100, 140],
140
+ wrap=True,
141
+ )
142
+ else:
143
+ pass
144
+
145
+ return [md_1, plot_1, plot_2]
146
+
147
+ block_css = """
148
+ #notice_markdown {
149
+ font-size: 104%
150
+ }
151
+ #notice_markdown th {
152
+ display: none;
153
+ }
154
+ #notice_markdown td {
155
+ padding-top: 6px;
156
+ padding-bottom: 6px;
157
+ }
158
+ #leaderboard_markdown {
159
+ font-size: 104%
160
+ }
161
+ #leaderboard_markdown td {
162
+ padding-top: 6px;
163
+ padding-bottom: 6px;
164
+ }
165
+ #leaderboard_dataframe td {
166
+ line-height: 0.1em;
167
+ }
168
+ footer {
169
+ display:none !important
170
+ }
171
+ .sponsor-image-about img {
172
+ margin: 0 20px;
173
+ margin-top: 20px;
174
+ height: 40px;
175
+ max-height: 100%;
176
+ width: auto;
177
+ float: left;
178
+ }
179
+ """
180
+
181
+ def build_demo(leaderboard_table_file1, leaderboard_table_file2):
182
+ text_size = gr.themes.sizes.text_lg
183
+ with gr.Blocks(
184
+ title="LLMS Benchmark",
185
+ theme=gr.themes.Base(text_size=text_size),
186
+ css=block_css,
187
+ ) as demo:
188
+ leader_components = build_leaderboard_tab(
189
+ leaderboard_table_file1,leaderboard_table_file2, show_plot=True
190
+ )
191
+ return demo
192
+
193
+ if __name__ == "__main__":
194
+ parser = argparse.ArgumentParser()
195
+ parser.add_argument("--share", action="store_true")
196
+ args = parser.parse_args()
197
+
198
+ leaderboard_table_file1 = "./Benchmark1/leaderboard.csv"
199
+ leaderboard_table_file2 = "./Benchmark2/leaderboard.csv"
200
+ demo = build_demo(leaderboard_table_file1,leaderboard_table_file2)
201
+ demo.launch(share=args.share)