Ibraaheem's picture
Update private_gpt/server/ingest/ingest_router.py
fc30d1e
from typing import Literal, List
from fastapi import FastAPI
from fastapi import APIRouter, Depends, HTTPException, Request, UploadFile, Response
from pydantic import BaseModel
from private_gpt.server.ingest.ingest_service import IngestService
from private_gpt.server.ingest.model import IngestedDoc
from private_gpt.server.utils.authentication import get_current_user
ingest_router = APIRouter(prefix="/v1", dependencies=[Depends(get_current_user)])
from fastapi import APIRouter, File, UploadFile, HTTPException, Depends
from fastapi.requests import Request
class IngestResponse(BaseModel):
object: Literal["list"]
model: Literal["private-gpt"]
data: list[IngestedDoc]
@ingest_router.post("/ingest", tags=["Ingestion"])
def ingest(request: Request, files: List[UploadFile]) -> IngestResponse:
"""Ingests and processes files, storing their chunks to be used as context."""
# Check total file count (including existing files)
service = request.state.injector.get(IngestService)
existing_documents = service.list_ingested_filenames()
if len(existing_documents) + len(files) > 5:
raise HTTPException(403, "File limit reached. Maximum 5 files allowed.")
# Limit on number of files:
if len(files) > 5:
raise HTTPException(400, "File limit reached (maximum 5 files allowed)")
# Limit on file extensions:
allowed_extensions = {"pdf", "csv"}
for file in files:
if file.filename.lower().split(".")[-1] not in allowed_extensions:
raise HTTPException(400, "Invalid file type. Please upload PDF or CSV files only.")
service = request.state.injector.get(IngestService)
ingested_documents = []
for file in files:
if file.filename is None:
raise HTTPException(400, "No file name provided")
ingested_documents.extend(service.ingest_bin_data(file.filename, file.file))
return IngestResponse(object="list", model="private-gpt", data=ingested_documents)
@ingest_router.get("/ingest/list", tags=["Ingestion"])
def list_ingested(request: Request) -> IngestResponse:
"""Lists already ingested Documents including their Document ID and metadata.
Those IDs can be used to filter the context used to create responses
in `/chat/completions`, `/completions`, and `/chunks` APIs.
"""
service = request.state.injector.get(IngestService)
ingested_documents = service.list_ingested()
return IngestResponse(object="list", model="private-gpt", data=ingested_documents)
@ingest_router.delete("/ingest", tags=["Ingestion"])
def delete_ingested(request: Request, file_names: List[str]) -> None:
"""Deletes ingested Documents with the specified file names.
Accepts a list of file names in the request body and deletes
all documents associated with those file names.
"""
service = request.state.injector.get(IngestService)
# Find documents to delete for each file name
documents_to_delete = []
for file_name in file_names:
ingested_documents = service.list_ingested()
docs_for_file = [doc.doc_id for doc in ingested_documents if doc.doc_metadata.get("file_name") == file_name]
documents_to_delete.extend(docs_for_file)
# Delete the documents
for doc_id_to_delete in documents_to_delete:
service.delete(doc_id_to_delete)
return Response(status_code=204) # No content
@ingest_router.get("/ingest/list_filenames", tags=["Ingestion"], response_model=List[str])
def list_ingested(request: Request) -> List[str]:
"""Lists already ingested Documents including their Document ID and metadata.
Those IDs can be used to filter the context used to create responses
in `/chat/completions`, `/completions`, and `/chunks` APIs.
"""
service = request.state.injector.get(IngestService)
ingested_documents: List[IngestedDoc] = service.list_ingested()
# Extract unique filenames
unique_filenames = set(doc.doc_metadata.get("file_name", "") for doc in ingested_documents)
unique_filenames_list = list(unique_filenames)
return unique_filenames_list