Spaces:
Sleeping
Sleeping
| from typing import Literal, List | |
| from fastapi import FastAPI | |
| from fastapi import APIRouter, Depends, HTTPException, Request, UploadFile, Response | |
| from pydantic import BaseModel | |
| from private_gpt.server.ingest.ingest_service import IngestService | |
| from private_gpt.server.ingest.model import IngestedDoc | |
| from private_gpt.server.utils.authentication import get_current_user | |
| ingest_router = APIRouter(prefix="/v1", dependencies=[Depends(get_current_user)]) | |
| from fastapi import APIRouter, File, UploadFile, HTTPException, Depends | |
| from fastapi.requests import Request | |
| class IngestResponse(BaseModel): | |
| object: Literal["list"] | |
| model: Literal["private-gpt"] | |
| data: list[IngestedDoc] | |
| def ingest(request: Request, files: List[UploadFile]) -> IngestResponse: | |
| """Ingests and processes files, storing their chunks to be used as context.""" | |
| # Check total file count (including existing files) | |
| service = request.state.injector.get(IngestService) | |
| existing_documents = service.list_ingested_filenames() | |
| if len(existing_documents) + len(files) > 5: | |
| raise HTTPException(403, "File limit reached. Maximum 5 files allowed.") | |
| # Limit on number of files: | |
| if len(files) > 5: | |
| raise HTTPException(400, "File limit reached (maximum 5 files allowed)") | |
| # Limit on file extensions: | |
| allowed_extensions = {"pdf", "csv"} | |
| for file in files: | |
| if file.filename.lower().split(".")[-1] not in allowed_extensions: | |
| raise HTTPException(400, "Invalid file type. Please upload PDF or CSV files only.") | |
| service = request.state.injector.get(IngestService) | |
| ingested_documents = [] | |
| for file in files: | |
| if file.filename is None: | |
| raise HTTPException(400, "No file name provided") | |
| ingested_documents.extend(service.ingest_bin_data(file.filename, file.file)) | |
| return IngestResponse(object="list", model="private-gpt", data=ingested_documents) | |
| def list_ingested(request: Request) -> IngestResponse: | |
| """Lists already ingested Documents including their Document ID and metadata. | |
| Those IDs can be used to filter the context used to create responses | |
| in `/chat/completions`, `/completions`, and `/chunks` APIs. | |
| """ | |
| service = request.state.injector.get(IngestService) | |
| ingested_documents = service.list_ingested() | |
| return IngestResponse(object="list", model="private-gpt", data=ingested_documents) | |
| def delete_ingested(request: Request, file_names: List[str]) -> None: | |
| """Deletes ingested Documents with the specified file names. | |
| Accepts a list of file names in the request body and deletes | |
| all documents associated with those file names. | |
| """ | |
| service = request.state.injector.get(IngestService) | |
| # Find documents to delete for each file name | |
| documents_to_delete = [] | |
| for file_name in file_names: | |
| ingested_documents = service.list_ingested() | |
| docs_for_file = [doc.doc_id for doc in ingested_documents if doc.doc_metadata.get("file_name") == file_name] | |
| documents_to_delete.extend(docs_for_file) | |
| # Delete the documents | |
| for doc_id_to_delete in documents_to_delete: | |
| service.delete(doc_id_to_delete) | |
| return Response(status_code=204) # No content | |
| def list_ingested(request: Request) -> List[str]: | |
| """Lists already ingested Documents including their Document ID and metadata. | |
| Those IDs can be used to filter the context used to create responses | |
| in `/chat/completions`, `/completions`, and `/chunks` APIs. | |
| """ | |
| service = request.state.injector.get(IngestService) | |
| ingested_documents: List[IngestedDoc] = service.list_ingested() | |
| # Extract unique filenames | |
| unique_filenames = set(doc.doc_metadata.get("file_name", "") for doc in ingested_documents) | |
| unique_filenames_list = list(unique_filenames) | |
| return unique_filenames_list | |