Spaces:
Running
Running
Carlo Moro
commited on
Commit
·
3d745f2
1
Parent(s):
72bec54
Implement app.py for reranking and deduplication, and add requirements for strive-ranker library
Browse files- app.py +28 -0
- requirements.txt +1 -0
app.py
ADDED
|
@@ -0,0 +1,28 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from strive.reranker import Reranker, EmbeddingType, deduplicate_results
|
| 2 |
+
import gradio as gr
|
| 3 |
+
|
| 4 |
+
def rerank_and_deduplicate(query, corpus_text):
|
| 5 |
+
corpus = [line.strip() for line in corpus_text.split("\n") if line.strip()]
|
| 6 |
+
textual_reranker = Reranker(embedding_type=EmbeddingType.textual)
|
| 7 |
+
semantic_reranker = Reranker(embedding_type=EmbeddingType.semantic)
|
| 8 |
+
|
| 9 |
+
textual_results = textual_reranker.rerank_documents(query, corpus, top_k=len(corpus))
|
| 10 |
+
semantic_results = semantic_reranker.rerank_documents(query, corpus, top_k=len(corpus))
|
| 11 |
+
|
| 12 |
+
merged_results = textual_results + semantic_results
|
| 13 |
+
deduplicated_results = deduplicate_results(merged_results, top_k=50)
|
| 14 |
+
|
| 15 |
+
return "\n".join([f"{text} (Score: {score:.4f})" for text, score in deduplicated_results])
|
| 16 |
+
|
| 17 |
+
app = gr.Interface(
|
| 18 |
+
fn=rerank_and_deduplicate,
|
| 19 |
+
inputs=[
|
| 20 |
+
gr.Textbox(label="Query", placeholder="Enter your query here"),
|
| 21 |
+
gr.Textbox(label="Corpus", placeholder="Enter one sentence per line", lines=10)
|
| 22 |
+
],
|
| 23 |
+
outputs=gr.Textbox(label="Top Ranked Results"),
|
| 24 |
+
title="STRIVE: Semantic Tokenized Ranking via Vectorization & Embeddings",
|
| 25 |
+
description="Enter a query and multiple sentences to test the reranking algorithm."
|
| 26 |
+
)
|
| 27 |
+
|
| 28 |
+
app.launch()
|
requirements.txt
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
strive-ranker
|