import asyncio
from infinity_emb import AsyncEngineArray, EngineArgs, AsyncEmbeddingEngine
query = "中国的首都是哪里?" # "What is the capital of China?"
docs = ["beijing", "shanghai"] # "北京", "上海"

INSTRUCTION = "Query:"
query = f"{INSTRUCTION} {query}"

array = AsyncEngineArray.from_args(
  [EngineArgs(model_name_or_path = "OpenBMB/MiniCPM-Reranker-Light", engine="torch", dtype="float16", bettertransformer=False, trust_remote_code=True, model_warmup=False)]
)

async def rerank(engine: AsyncEmbeddingEngine): 
    async with engine:
        ranking, usage = await engine.rerank(query=query, docs=docs)
        print(list(zip(ranking, docs)))

asyncio.run(rerank(array[0])) # [(RerankReturnType(relevance_score=0.017917344, document='beijing', index=0), 'beijing'), (RerankReturnType(relevance_score=0.00024729347, document='shanghai', index=1), 'shanghai')]