Spaces:

librarian-bots
/

huggingface-semantic-search

Running

App Files Files Community

davanstrien HF Staff commited on Aug 9, 2023

Commit

84bfe38

1 Parent(s): 85ef5ed

add results number slider

Browse files

Files changed (1) hide show

app.py +34 -16

app.py CHANGED Viewed

@@ -1,10 +1,11 @@
-import gradio as gr
-from qdrant_client import QdrantClient
-from qdrant_client import models
-from sentence_transformers import SentenceTransformer
-from dotenv import load_dotenv
 import os
 from functools import lru_cache
 load_dotenv()
@@ -22,25 +23,31 @@ client = QdrantClient(
 def format_results(results):
-    markdown = ""
     for result in results:
         hub_id = result.payload["id"]
         url = f"https://huggingface.co/datasets/{hub_id}"
         header = f"## [{hub_id}]({url})"
         markdown += header + "\n"
-        markdown += result.payload["section_text"] + "\n"
     return markdown
 @lru_cache(maxsize=100_000)
-def search(query: str):
     query_ = sentence_embedding_model.encode(
         f"Represent this sentence for searching relevant passages:{query}"
     )
     results = client.search(
         collection_name="dataset_cards",
         query_vector=query_,
-        limit=10,
     )
     return format_results(results)
@@ -68,17 +75,19 @@ def hub_id_qdrant_id(hub_id):
 @lru_cache()
-def recommend(hub_id):
     positive_id = hub_id_qdrant_id(hub_id)
-    results = client.recommend(collection_name=collection_name, positive=[positive_id])
     return format_results(results)
-def query(search_term, search_type):
     if search_type == "Recommend similar datasets":
-        return recommend(search_term)
     else:
-        return search(search_term)
 with gr.Blocks() as demo:
@@ -94,6 +103,7 @@ with gr.Blocks() as demo:
             value="movie review sentiment",
             label="hub id i.e. IMDB or query i.e. movie review sentiment",
         )
     with gr.Row():
         with gr.Row():
             find_similar_btn = gr.Button("Search")
@@ -103,9 +113,17 @@ with gr.Blocks() as demo:
                 value="Semantic Search",
                 interactive=True,
             )
     results = gr.Markdown()
-    find_similar_btn.click(query, [search_term, search_type], results)
 demo.launch()

 import os
 from functools import lru_cache
+from typing import Optional
+import gradio as gr
+from dotenv import load_dotenv
+from qdrant_client import QdrantClient, models
+from sentence_transformers import SentenceTransformer
 load_dotenv()
 def format_results(results):
+    markdown = (
+        "<h1 style='text-align: center;'>  &#x2728; Dataset Search Results  &#x2728;"
+        " </h1> \n\n"
+    )
     for result in results:
         hub_id = result.payload["id"]
+        download_number = result.payload["downloads"]
         url = f"https://huggingface.co/datasets/{hub_id}"
         header = f"## [{hub_id}]({url})"
         markdown += header + "\n"
+        markdown += f"**Downloads:** {download_number}\n\n"
+        markdown += f"{result.payload['section_text']} \n"
     return markdown
 @lru_cache(maxsize=100_000)
+def search(query: str, limit: Optional[int] = 10):
     query_ = sentence_embedding_model.encode(
         f"Represent this sentence for searching relevant passages:{query}"
     )
     results = client.search(
         collection_name="dataset_cards",
         query_vector=query_,
+        limit=limit,
     )
     return format_results(results)
 @lru_cache()
+def recommend(hub_id, limit: Optional[int] = 10):
     positive_id = hub_id_qdrant_id(hub_id)
+    results = client.recommend(
+        collection_name=collection_name, positive=[positive_id], limit=limit
+    )
     return format_results(results)
+def query(search_term, search_type, limit: Optional[int] = 10):
     if search_type == "Recommend similar datasets":
+        return recommend(search_term, limit)
     else:
+        return search(search_term, limit)
 with gr.Blocks() as demo:
             value="movie review sentiment",
             label="hub id i.e. IMDB or query i.e. movie review sentiment",
         )
     with gr.Row():
         with gr.Row():
             find_similar_btn = gr.Button("Search")
                 value="Semantic Search",
                 interactive=True,
             )
+        with gr.Column():
+            max_results = gr.Slider(
+                minimum=1,
+                maximum=50,
+                step=1,
+                value=10,
+                label="Maximum number of results",
+                help="This is the maximum number of results that will be returned",
+            )
     results = gr.Markdown()
+    find_similar_btn.click(query, [search_term, search_type, max_results], results)
 demo.launch()