|
from typing import Literal, List |
|
from fastapi import FastAPI |
|
from fastapi import APIRouter, Depends, HTTPException, Request, UploadFile, Response |
|
from pydantic import BaseModel |
|
|
|
from private_gpt.server.ingest.ingest_service import IngestService |
|
from private_gpt.server.ingest.model import IngestedDoc |
|
from private_gpt.server.utils.authentication import get_current_user |
|
ingest_router = APIRouter(prefix="/v1", dependencies=[Depends(get_current_user)]) |
|
|
|
from fastapi import APIRouter, File, UploadFile, HTTPException, Depends |
|
from fastapi.requests import Request |
|
|
|
class IngestResponse(BaseModel): |
|
object: Literal["list"] |
|
model: Literal["private-gpt"] |
|
data: list[IngestedDoc] |
|
|
|
|
|
|
|
@ingest_router.post("/ingest", tags=["Ingestion"]) |
|
def ingest(request: Request, files: List[UploadFile]) -> IngestResponse: |
|
"""Ingests and processes files, storing their chunks to be used as context.""" |
|
|
|
|
|
service = request.state.injector.get(IngestService) |
|
|
|
existing_documents = service.list_ingested_filenames() |
|
|
|
if len(existing_documents) + len(files) > 5: |
|
raise HTTPException(403, "File limit reached. Maximum 5 files allowed.") |
|
|
|
|
|
if len(files) > 5: |
|
raise HTTPException(400, "File limit reached (maximum 5 files allowed)") |
|
|
|
|
|
allowed_extensions = {"pdf", "csv"} |
|
for file in files: |
|
if file.filename.lower().split(".")[-1] not in allowed_extensions: |
|
raise HTTPException(400, "Invalid file type. Please upload PDF or CSV files only.") |
|
|
|
service = request.state.injector.get(IngestService) |
|
ingested_documents = [] |
|
for file in files: |
|
if file.filename is None: |
|
raise HTTPException(400, "No file name provided") |
|
ingested_documents.extend(service.ingest_bin_data(file.filename, file.file)) |
|
|
|
return IngestResponse(object="list", model="private-gpt", data=ingested_documents) |
|
|
|
|
|
@ingest_router.get("/ingest/list", tags=["Ingestion"]) |
|
def list_ingested(request: Request) -> IngestResponse: |
|
"""Lists already ingested Documents including their Document ID and metadata. |
|
|
|
Those IDs can be used to filter the context used to create responses |
|
in `/chat/completions`, `/completions`, and `/chunks` APIs. |
|
""" |
|
service = request.state.injector.get(IngestService) |
|
ingested_documents = service.list_ingested() |
|
return IngestResponse(object="list", model="private-gpt", data=ingested_documents) |
|
|
|
|
|
@ingest_router.delete("/ingest", tags=["Ingestion"]) |
|
def delete_ingested(request: Request, file_names: List[str]) -> None: |
|
"""Deletes ingested Documents with the specified file names. |
|
|
|
Accepts a list of file names in the request body and deletes |
|
all documents associated with those file names. |
|
""" |
|
|
|
service = request.state.injector.get(IngestService) |
|
|
|
|
|
documents_to_delete = [] |
|
for file_name in file_names: |
|
ingested_documents = service.list_ingested() |
|
docs_for_file = [doc.doc_id for doc in ingested_documents if doc.doc_metadata.get("file_name") == file_name] |
|
documents_to_delete.extend(docs_for_file) |
|
|
|
|
|
for doc_id_to_delete in documents_to_delete: |
|
service.delete(doc_id_to_delete) |
|
|
|
return Response(status_code=204) |
|
|
|
|
|
|
|
@ingest_router.get("/ingest/list_filenames", tags=["Ingestion"], response_model=List[str]) |
|
def list_ingested(request: Request) -> List[str]: |
|
"""Lists already ingested Documents including their Document ID and metadata. |
|
|
|
Those IDs can be used to filter the context used to create responses |
|
in `/chat/completions`, `/completions`, and `/chunks` APIs. |
|
""" |
|
service = request.state.injector.get(IngestService) |
|
ingested_documents: List[IngestedDoc] = service.list_ingested() |
|
|
|
|
|
unique_filenames = set(doc.doc_metadata.get("file_name", "") for doc in ingested_documents) |
|
unique_filenames_list = list(unique_filenames) |
|
|
|
return unique_filenames_list |
|
|
|
|