Spaces:
Runtime error
Runtime error
File size: 2,199 Bytes
2eafbc4 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 |
from typing import Union
from fastapi.encoders import jsonable_encoder
from inference.core.devices.utils import GLOBAL_INFERENCE_SERVER_ID
from inference.core.entities.requests.inference import InferenceRequest
from inference.core.entities.responses.inference import InferenceResponse
from inference.core.env import TINY_CACHE
from inference.core.logger import logger
from inference.core.version import __version__
def to_cachable_inference_item(
infer_request: InferenceRequest,
infer_response: Union[InferenceResponse, list[InferenceResponse]],
) -> dict:
if not TINY_CACHE:
return {
"inference_id": infer_request.id,
"inference_server_version": __version__,
"inference_server_id": GLOBAL_INFERENCE_SERVER_ID,
"request": jsonable_encoder(infer_request),
"response": jsonable_encoder(infer_response),
}
included_request_fields = {
"api_key",
"confidence",
"model_id",
"model_type",
"source",
"source_info",
}
request = infer_request.dict(include=included_request_fields)
response = build_condensed_response(infer_response)
return {
"inference_id": infer_request.id,
"inference_server_version": __version__,
"inference_server_id": GLOBAL_INFERENCE_SERVER_ID,
"request": jsonable_encoder(request),
"response": jsonable_encoder(response),
}
def build_condensed_response(responses):
if not isinstance(responses, list):
responses = [responses]
formatted_responses = []
for response in responses:
if not getattr(response, "predictions", None):
continue
try:
predictions = [
{"confidence": pred.confidence, "class": pred.class_name}
for pred in response.predictions
]
formatted_responses.append(
{
"predictions": predictions,
"time": response.time,
}
)
except Exception as e:
logger.warning(f"Error formatting response, skipping caching: {e}")
return formatted_responses
|