Spaces:
Sleeping
Sleeping
File size: 4,099 Bytes
b96229e fc30d1e bf6d237 b96229e bf6d237 fc30d1e bf6d237 fc30d1e bf6d237 fc30d1e bf6d237 fc30d1e bf6d237 fc30d1e bf6d237 fc30d1e bf6d237 fc30d1e b96229e fc30d1e b96229e fc30d1e b96229e fc30d1e b96229e fc30d1e b96229e fc30d1e b96229e bf6d237 b96229e bf6d237 b96229e fc30d1e |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 |
from typing import Literal, List
from fastapi import FastAPI
from fastapi import APIRouter, Depends, HTTPException, Request, UploadFile, Response
from pydantic import BaseModel
from private_gpt.server.ingest.ingest_service import IngestService
from private_gpt.server.ingest.model import IngestedDoc
from private_gpt.server.utils.authentication import get_current_user
ingest_router = APIRouter(prefix="/v1", dependencies=[Depends(get_current_user)])
from fastapi import APIRouter, File, UploadFile, HTTPException, Depends
from fastapi.requests import Request
class IngestResponse(BaseModel):
object: Literal["list"]
model: Literal["private-gpt"]
data: list[IngestedDoc]
@ingest_router.post("/ingest", tags=["Ingestion"])
def ingest(request: Request, files: List[UploadFile]) -> IngestResponse:
"""Ingests and processes files, storing their chunks to be used as context."""
# Check total file count (including existing files)
service = request.state.injector.get(IngestService)
existing_documents = service.list_ingested_filenames()
if len(existing_documents) + len(files) > 5:
raise HTTPException(403, "File limit reached. Maximum 5 files allowed.")
# Limit on number of files:
if len(files) > 5:
raise HTTPException(400, "File limit reached (maximum 5 files allowed)")
# Limit on file extensions:
allowed_extensions = {"pdf", "csv"}
for file in files:
if file.filename.lower().split(".")[-1] not in allowed_extensions:
raise HTTPException(400, "Invalid file type. Please upload PDF or CSV files only.")
service = request.state.injector.get(IngestService)
ingested_documents = []
for file in files:
if file.filename is None:
raise HTTPException(400, "No file name provided")
ingested_documents.extend(service.ingest_bin_data(file.filename, file.file))
return IngestResponse(object="list", model="private-gpt", data=ingested_documents)
@ingest_router.get("/ingest/list", tags=["Ingestion"])
def list_ingested(request: Request) -> IngestResponse:
"""Lists already ingested Documents including their Document ID and metadata.
Those IDs can be used to filter the context used to create responses
in `/chat/completions`, `/completions`, and `/chunks` APIs.
"""
service = request.state.injector.get(IngestService)
ingested_documents = service.list_ingested()
return IngestResponse(object="list", model="private-gpt", data=ingested_documents)
@ingest_router.delete("/ingest", tags=["Ingestion"])
def delete_ingested(request: Request, file_names: List[str]) -> None:
"""Deletes ingested Documents with the specified file names.
Accepts a list of file names in the request body and deletes
all documents associated with those file names.
"""
service = request.state.injector.get(IngestService)
# Find documents to delete for each file name
documents_to_delete = []
for file_name in file_names:
ingested_documents = service.list_ingested()
docs_for_file = [doc.doc_id for doc in ingested_documents if doc.doc_metadata.get("file_name") == file_name]
documents_to_delete.extend(docs_for_file)
# Delete the documents
for doc_id_to_delete in documents_to_delete:
service.delete(doc_id_to_delete)
return Response(status_code=204) # No content
@ingest_router.get("/ingest/list_filenames", tags=["Ingestion"], response_model=List[str])
def list_ingested(request: Request) -> List[str]:
"""Lists already ingested Documents including their Document ID and metadata.
Those IDs can be used to filter the context used to create responses
in `/chat/completions`, `/completions`, and `/chunks` APIs.
"""
service = request.state.injector.get(IngestService)
ingested_documents: List[IngestedDoc] = service.list_ingested()
# Extract unique filenames
unique_filenames = set(doc.doc_metadata.get("file_name", "") for doc in ingested_documents)
unique_filenames_list = list(unique_filenames)
return unique_filenames_list
|