huynhdoo commited on
Commit
46e1dc5
·
verified ·
1 Parent(s): 5125874

Upload folder using huggingface_hub

Browse files
Files changed (3) hide show
  1. __pycache__/mps-api.cpython-310.pyc +0 -0
  2. app.py +5 -4
  3. mps-api.py +37 -7
__pycache__/mps-api.cpython-310.pyc CHANGED
Binary files a/__pycache__/mps-api.cpython-310.pyc and b/__pycache__/mps-api.cpython-310.pyc differ
 
app.py CHANGED
@@ -2,7 +2,8 @@ import gradio as gr
2
  import requests
3
  import pandas as pd
4
 
5
- api_url = 'https://huynhdoo--mps-api-query.modal.run'
 
6
 
7
  origins = {
8
  'Formation' : ['formation.presentation', 'formation.summary'],
@@ -13,14 +14,14 @@ origins = {
13
  'metier.format_court2']
14
  }
15
 
16
- def API(origin='Formation', query='cuisine'):
17
  # Query API
18
  json = dict(
19
  query=query,
20
  origins=origins[origin]
21
  )
22
 
23
- resp = requests.post(url=api_url, json=json)
24
  data = resp.json()
25
 
26
  # Format result
@@ -33,7 +34,7 @@ def API(origin='Formation', query='cuisine'):
33
  return df
34
 
35
  gradio_app = gr.Interface(
36
- fn=API,
37
  inputs=[
38
  gr.Dropdown(list(origins.keys()), label="Origine", info="Choisir un type de donnée à interroger"),
39
  gr.Textbox(label="Recherche", info="Votre recherche")
 
2
  import requests
3
  import pandas as pd
4
 
5
+ def api_url(remote):
6
+ return f"https://huynhdoo--mps-api-{remote}.modal.run"
7
 
8
  origins = {
9
  'Formation' : ['formation.presentation', 'formation.summary'],
 
14
  'metier.format_court2']
15
  }
16
 
17
+ def retrieve(origin='Formation', query='cuisine'):
18
  # Query API
19
  json = dict(
20
  query=query,
21
  origins=origins[origin]
22
  )
23
 
24
+ resp = requests.post(url=api_url('retrieve'), json=json)
25
  data = resp.json()
26
 
27
  # Format result
 
34
  return df
35
 
36
  gradio_app = gr.Interface(
37
+ fn=retrieve,
38
  inputs=[
39
  gr.Dropdown(list(origins.keys()), label="Origine", info="Choisir un type de donnée à interroger"),
40
  gr.Textbox(label="Recherche", info="Votre recherche")
mps-api.py CHANGED
@@ -9,6 +9,7 @@ model_image = (Image.debian_slim(python_version="3.12")
9
  # Utilities
10
  with model_image.imports():
11
  import os
 
12
  __import__("pysqlite3")
13
  sys.modules["sqlite3"] = sys.modules.pop("pysqlite3") # Hotswap SQLlite version
14
 
@@ -42,7 +43,7 @@ class VECTORDB:
42
  print(f"{self.chroma_collection.count()} documents loaded.")
43
 
44
  @method()
45
- def query(self, query, origins):
46
  results = self.chroma_collection.query(
47
  query_texts=[query],
48
  n_results=10,
@@ -54,18 +55,47 @@ class VECTORDB:
54
  distances = results['distances'][0]
55
  return documents, metadatas, distances
56
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
57
  ###########
58
  # ENDPOINTS
59
  ###########
60
  @app.function(timeout=30*60)
61
  @web_endpoint(method="POST")
62
- def query(query: Dict):
63
  # Log query
64
- print(f"Incoming query: {query}...")
65
 
66
- # Instantiate vectordb
67
- vectordb = VECTORDB()
68
 
69
- # Run query
70
- documents, metadatas, distances = vectordb.query.remote(query['query'], query['origins'])
71
  return {"documents" : documents, "metadatas" : metadatas, "distances" : distances}
 
 
 
 
 
 
 
 
 
 
 
 
9
  # Utilities
10
  with model_image.imports():
11
  import os
12
+ import numpy as np
13
  __import__("pysqlite3")
14
  sys.modules["sqlite3"] = sys.modules.pop("pysqlite3") # Hotswap SQLlite version
15
 
 
43
  print(f"{self.chroma_collection.count()} documents loaded.")
44
 
45
  @method()
46
+ def search(self, query, origins):
47
  results = self.chroma_collection.query(
48
  query_texts=[query],
49
  n_results=10,
 
55
  distances = results['distances'][0]
56
  return documents, metadatas, distances
57
 
58
+ @app.cls(timeout=30*60)
59
+ class RANKING:
60
+ @enter()
61
+ @build()
62
+ def init(self):
63
+ # Load crossencoder
64
+ from sentence_transformers import CrossEncoder
65
+ model_name = "Lajavaness/CrossEncoder-camembert-large"
66
+ self.cross_encoder = CrossEncoder(model_name)
67
+ print(f"Cross encoder model loaded: {model_name}")
68
+
69
+ @method()
70
+ def rank(self, query, documents):
71
+ pairs = [[query, doc] for doc in documents]
72
+ print(pairs)
73
+ scores = self.cross_encoder.predict(pairs)
74
+ print(scores)
75
+ ranking = np.argsort(scores)[::]
76
+ return ranking
77
+
78
  ###########
79
  # ENDPOINTS
80
  ###########
81
  @app.function(timeout=30*60)
82
  @web_endpoint(method="POST")
83
+ def retrieve(query: Dict):
84
  # Log query
85
+ print(f"Retrieve query: {query}...")
86
 
87
+ # Searching documents
88
+ documents, metadatas, distances = VECTORDB().search.remote(query['query'], query['origins'])
89
 
 
 
90
  return {"documents" : documents, "metadatas" : metadatas, "distances" : distances}
91
+
92
+ @app.function(timeout=30*60)
93
+ @web_endpoint(method="POST")
94
+ def rank(query: Dict):
95
+ # Log query
96
+ print(f"Rank query: {query}...")
97
+
98
+ # Ranking documents
99
+ ranking = RANKING().rank.remote(query['query'], query['documents'])
100
+
101
+ return {"ranking" : ranking}