chryzxc commited on
Commit
32854a5
·
verified ·
1 Parent(s): 43d635b

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +12 -18
app.py CHANGED
@@ -1,28 +1,22 @@
1
- from transformers import AutoTokenizer
2
  from onnxruntime import InferenceSession
3
  import numpy as np
4
- import json
5
- from fastapi import FastAPI
6
 
7
  app = FastAPI()
8
 
9
- # Initialize components
10
- tokenizer = AutoTokenizer.from_pretrained(
11
- "Xenova/multi-qa-mpnet-base-dot-v1",
12
- use_fast=False # Avoids framework dependencies
13
- )
14
  session = InferenceSession("model.onnx")
15
 
16
- def cosine_similarity(a, b):
17
- return np.dot(a, b) / (np.linalg.norm(a) * np.linalg.norm(b))
18
-
19
  @app.post("/predict")
20
- async def predict(query: str):
21
- # Tokenize
22
- inputs = tokenizer(query, return_tensors="np")
23
- inputs = {k: v.astype(np.int64) for k, v in inputs.items()}
24
 
25
- # Get embedding
26
- embedding = session.run(None, inputs)[0][0]
 
 
 
27
 
28
- return {"embedding": embedding.tolist()}
 
1
+ from fastapi import FastAPI
2
  from onnxruntime import InferenceSession
3
  import numpy as np
 
 
4
 
5
  app = FastAPI()
6
 
7
+ # Load ONNX model only
 
 
 
 
8
  session = InferenceSession("model.onnx")
9
 
 
 
 
10
  @app.post("/predict")
11
+ async def predict(inputs: dict):
12
+ # Expect pre-tokenized input from client
13
+ input_ids = np.array(inputs["input_ids"], dtype=np.int64)
14
+ attention_mask = np.array(inputs["attention_mask"], dtype=np.int64)
15
 
16
+ # Run model
17
+ outputs = session.run(None, {
18
+ "input_ids": input_ids,
19
+ "attention_mask": attention_mask
20
+ })
21
 
22
+ return {"embedding": outputs[0].tolist()}