import asyncio | |
from infinity_emb import AsyncEngineArray, EngineArgs, AsyncEmbeddingEngine | |
query = "中国的首都是哪里?" # "What is the capital of China?" | |
docs = ["beijing", "shanghai"] # "北京", "上海" | |
INSTRUCTION = "Query:" | |
query = f"{INSTRUCTION} {query}" | |
array = AsyncEngineArray.from_args( | |
[EngineArgs(model_name_or_path = "OpenBMB/MiniCPM-Reranker-Light", engine="torch", dtype="float16", bettertransformer=False, trust_remote_code=True, model_warmup=False)] | |
) | |
async def rerank(engine: AsyncEmbeddingEngine): | |
async with engine: | |
ranking, usage = await engine.rerank(query=query, docs=docs) | |
print(list(zip(ranking, docs))) | |
asyncio.run(rerank(array[0])) # [(RerankReturnType(relevance_score=0.017917344, document='beijing', index=0), 'beijing'), (RerankReturnType(relevance_score=0.00024729347, document='shanghai', index=1), 'shanghai')] |