|
from typing import Literal |
|
|
|
from fastapi import APIRouter, Depends, Request |
|
from pydantic import BaseModel, Field |
|
|
|
from private_gpt.open_ai.extensions.context_filter import ContextFilter |
|
from private_gpt.server.chunks.chunks_service import Chunk, ChunksService |
|
from private_gpt.server.utils.auth import authenticated |
|
|
|
chunks_router = APIRouter(prefix="/v1", dependencies=[Depends(authenticated)]) |
|
|
|
|
|
class ChunksBody(BaseModel): |
|
text: str = Field(examples=["Q3 2023 sales"]) |
|
context_filter: ContextFilter | None = None |
|
limit: int = 10 |
|
prev_next_chunks: int = Field(default=0, examples=[2]) |
|
|
|
|
|
class ChunksResponse(BaseModel): |
|
object: Literal["list"] |
|
model: Literal["private-gpt"] |
|
data: list[Chunk] |
|
|
|
|
|
@chunks_router.post("/chunks", tags=["Context Chunks"]) |
|
def chunks_retrieval(request: Request, body: ChunksBody) -> ChunksResponse: |
|
"""Given a `text`, returns the most relevant chunks from the ingested documents. |
|
|
|
The returned information can be used to generate prompts that can be |
|
passed to `/completions` or `/chat/completions` APIs. Note: it is usually a very |
|
fast API, because only the Embeddings model is involved, not the LLM. The |
|
returned information contains the relevant chunk `text` together with the source |
|
`document` it is coming from. It also contains a score that can be used to |
|
compare different results. |
|
|
|
The max number of chunks to be returned is set using the `limit` param. |
|
|
|
Previous and next chunks (pieces of text that appear right before or after in the |
|
document) can be fetched by using the `prev_next_chunks` field. |
|
|
|
The documents being used can be filtered using the `context_filter` and passing |
|
the document IDs to be used. Ingested documents IDs can be found using |
|
`/ingest/list` endpoint. If you want all ingested documents to be used, |
|
remove `context_filter` altogether. |
|
""" |
|
service = request.state.injector.get(ChunksService) |
|
results = service.retrieve_relevant( |
|
body.text, body.context_filter, body.limit, body.prev_next_chunks |
|
) |
|
return ChunksResponse( |
|
object="list", |
|
model="private-gpt", |
|
data=results, |
|
) |
|
|