Spaces:
				
			
			
	
			
			
					
		Running
		
			on 
			
			CPU Upgrade
	
	
	
			
			
	
	
	
	
		
		
					
		Running
		
			on 
			
			CPU Upgrade
	Commit 
							
							·
						
						0d0563c
	
1
								Parent(s):
							
							cf7ddc6
								
update
Browse files
    	
        app.py
    CHANGED
    
    | 
         @@ -331,6 +331,19 @@ TASK_TO_METRIC = { 
     | 
|
| 331 | 
         
             
                "InstructionRetrieval": "p-MRR",
         
     | 
| 332 | 
         
             
            }
         
     | 
| 333 | 
         | 
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 334 | 
         
             
            def make_clickable_model(model_name, link=None):
         
     | 
| 335 | 
         
             
                if link is None:
         
     | 
| 336 | 
         
             
                    link = "https://huggingface.co/" + model_name
         
     | 
| 
         @@ -1170,6 +1183,15 @@ SENTENCE_TRANSFORMERS_COMPATIBLE_MODELS = { 
     | 
|
| 1170 | 
         
             
                for model in SENTENCE_TRANSFORMERS_COMPATIBLE_MODELS
         
     | 
| 1171 | 
         
             
            }
         
     | 
| 1172 | 
         | 
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 1173 | 
         
             
            MODELS_TO_SKIP = {
         
     | 
| 1174 | 
         
             
                "baseplate/instructor-large-1", # Duplicate
         
     | 
| 1175 | 
         
             
                "radames/e5-large", # Duplicate
         
     | 
| 
         @@ -1493,7 +1515,7 @@ def get_mteb_data(tasks=["Clustering"], langs=[], datasets=[], fillna=True, add_ 
     | 
|
| 1493 | 
         
             
                df = pd.DataFrame(df_list)
         
     | 
| 1494 | 
         
             
                # If there are any models that are the same, merge them
         
     | 
| 1495 | 
         
             
                # E.g. if out["Model"] has the same value in two places, merge & take whichever one is not NaN else just take the first one
         
     | 
| 1496 | 
         
            -
                df = df.groupby("Model", as_index=False).first()
         
     | 
| 1497 | 
         
             
                # Put 'Model' column first
         
     | 
| 1498 | 
         
             
                cols = sorted(list(df.columns))
         
     | 
| 1499 | 
         
             
                cols.insert(0, cols.pop(cols.index("Model")))
         
     | 
| 
         @@ -1502,6 +1524,9 @@ def get_mteb_data(tasks=["Clustering"], langs=[], datasets=[], fillna=True, add_ 
     | 
|
| 1502 | 
         
             
                    df = add_rank(df)       
         
     | 
| 1503 | 
         
             
                if fillna:
         
     | 
| 1504 | 
         
             
                    df.fillna("", inplace=True)
         
     | 
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 1505 | 
         
             
                return df
         
     | 
| 1506 | 
         | 
| 1507 | 
         
             
            def get_mteb_average():
         
     | 
| 
         @@ -2196,7 +2221,7 @@ function(goalUrlObject) { 
     | 
|
| 2196 | 
         
             
            def update_url_task(event: gr.SelectData, current_task_language: dict, language_per_task: dict):
         
     | 
| 2197 | 
         
             
                current_task_language["task"] = event.target.id
         
     | 
| 2198 | 
         
             
                # Either use the cached language for this task or the 1st language
         
     | 
| 2199 | 
         
            -
                current_task_language["language"] = language_per_task.get(event.target.id, event.target.children[ 
     | 
| 2200 | 
         
             
                return current_task_language, language_per_task
         
     | 
| 2201 | 
         | 
| 2202 | 
         
             
            def update_url_language(event: gr.SelectData, current_task_language: dict, language_per_task: dict):
         
     | 
| 
         @@ -2300,7 +2325,7 @@ with gr.Blocks(css=css) as block: 
     | 
|
| 2300 | 
         
             
                        with gr.Tab(task, id=task_tab_id) as task_tab:
         
     | 
| 2301 | 
         
             
                            # For updating the 'task' in the URL
         
     | 
| 2302 | 
         
             
                            task_tab.select(update_url_task, [current_task_language, language_per_task], [current_task_language, language_per_task]).then(None, [current_task_language], [], js=set_window_url_params)
         
     | 
| 2303 | 
         
            -
             
     | 
| 2304 | 
         
             
                            with gr.Tabs() as task_tabs:
         
     | 
| 2305 | 
         
             
                                # Store the task tabs for updating them on load based on URL parameters
         
     | 
| 2306 | 
         
             
                                tabs.append(task_tabs)
         
     | 
| 
         | 
|
| 331 | 
         
             
                "InstructionRetrieval": "p-MRR",
         
     | 
| 332 | 
         
             
            }
         
     | 
| 333 | 
         | 
| 334 | 
         
            +
            TASK_DESCRIPTION = {
         
     | 
| 335 | 
         
            +
                "Bitext Mining": "Bitext mining is the task of finding parallel sentences in two languages.",
         
     | 
| 336 | 
         
            +
                "Clustering": "Clustering is the task of grouping similar documents together.",
         
     | 
| 337 | 
         
            +
                "Classification": "Classification is the task of assigning a label to a text.",
         
     | 
| 338 | 
         
            +
                "Pair Classification": "Pair classification is the task of determining whether two texts are similar.",
         
     | 
| 339 | 
         
            +
                "Reranking": "Reranking is the task of reordering a list of documents to improve relevance.",
         
     | 
| 340 | 
         
            +
                "Retrieval": "Retrieval is the task of finding relevant documents for a query.",
         
     | 
| 341 | 
         
            +
                "STS": "Semantic Textual Similarity is the task of determining how similar two texts are.",
         
     | 
| 342 | 
         
            +
                "Summarization": "Summarization is the task of generating a summary of a text.",
         
     | 
| 343 | 
         
            +
                "Retrieval w/Instructions": "Retrieval w/Instructions is the task of finding relevant documents for a query that has detailed instructions.",
         
     | 
| 344 | 
         
            +
                "Overall": "Overall performance across MTEB tasks.",
         
     | 
| 345 | 
         
            +
            }
         
     | 
| 346 | 
         
            +
             
     | 
| 347 | 
         
             
            def make_clickable_model(model_name, link=None):
         
     | 
| 348 | 
         
             
                if link is None:
         
     | 
| 349 | 
         
             
                    link = "https://huggingface.co/" + model_name
         
     | 
| 
         | 
|
| 1183 | 
         
             
                for model in SENTENCE_TRANSFORMERS_COMPATIBLE_MODELS
         
     | 
| 1184 | 
         
             
            }
         
     | 
| 1185 | 
         | 
| 1186 | 
         
            +
            CROSS_ENCODERS = {
         
     | 
| 1187 | 
         
            +
                "FollowIR-7B",
         
     | 
| 1188 | 
         
            +
                "flan-t5-base",
         
     | 
| 1189 | 
         
            +
                "flan-t5-large",
         
     | 
| 1190 | 
         
            +
                "monobert-large-msmarco",
         
     | 
| 1191 | 
         
            +
                "monot5-3b-msmarco-10k",
         
     | 
| 1192 | 
         
            +
                "monot5-base-msmarco-10k",
         
     | 
| 1193 | 
         
            +
            }
         
     | 
| 1194 | 
         
            +
             
     | 
| 1195 | 
         
             
            MODELS_TO_SKIP = {
         
     | 
| 1196 | 
         
             
                "baseplate/instructor-large-1", # Duplicate
         
     | 
| 1197 | 
         
             
                "radames/e5-large", # Duplicate
         
     | 
| 
         | 
|
| 1515 | 
         
             
                df = pd.DataFrame(df_list)
         
     | 
| 1516 | 
         
             
                # If there are any models that are the same, merge them
         
     | 
| 1517 | 
         
             
                # E.g. if out["Model"] has the same value in two places, merge & take whichever one is not NaN else just take the first one
         
     | 
| 1518 | 
         
            +
                df = df.groupby("Model", as_index=False).first() 
         
     | 
| 1519 | 
         
             
                # Put 'Model' column first
         
     | 
| 1520 | 
         
             
                cols = sorted(list(df.columns))
         
     | 
| 1521 | 
         
             
                cols.insert(0, cols.pop(cols.index("Model")))
         
     | 
| 
         | 
|
| 1524 | 
         
             
                    df = add_rank(df)       
         
     | 
| 1525 | 
         
             
                if fillna:
         
     | 
| 1526 | 
         
             
                    df.fillna("", inplace=True)
         
     | 
| 1527 | 
         
            +
             
     | 
| 1528 | 
         
            +
                if "instruction" in task.lower():
         
     | 
| 1529 | 
         
            +
                    df["Model"] = df.Model.apply(lambda x: "❎" + x if x.split(">")[1].split("<")[0] in CROSS_ENCODERS else x)
         
     | 
| 1530 | 
         
             
                return df
         
     | 
| 1531 | 
         | 
| 1532 | 
         
             
            def get_mteb_average():
         
     | 
| 
         | 
|
| 2221 | 
         
             
            def update_url_task(event: gr.SelectData, current_task_language: dict, language_per_task: dict):
         
     | 
| 2222 | 
         
             
                current_task_language["task"] = event.target.id
         
     | 
| 2223 | 
         
             
                # Either use the cached language for this task or the 1st language
         
     | 
| 2224 | 
         
            +
                current_task_language["language"] = language_per_task.get(event.target.id, event.target.children[1].children[0].id)
         
     | 
| 2225 | 
         
             
                return current_task_language, language_per_task
         
     | 
| 2226 | 
         | 
| 2227 | 
         
             
            def update_url_language(event: gr.SelectData, current_task_language: dict, language_per_task: dict):
         
     | 
| 
         | 
|
| 2325 | 
         
             
                        with gr.Tab(task, id=task_tab_id) as task_tab:
         
     | 
| 2326 | 
         
             
                            # For updating the 'task' in the URL
         
     | 
| 2327 | 
         
             
                            task_tab.select(update_url_task, [current_task_language, language_per_task], [current_task_language, language_per_task]).then(None, [current_task_language], [], js=set_window_url_params)
         
     | 
| 2328 | 
         
            +
                            gr.Markdown(TASK_DESCRIPTION[task])
         
     | 
| 2329 | 
         
             
                            with gr.Tabs() as task_tabs:
         
     | 
| 2330 | 
         
             
                                # Store the task tabs for updating them on load based on URL parameters
         
     | 
| 2331 | 
         
             
                                tabs.append(task_tabs)
         
     |