File size: 4,099 Bytes
b96229e
fc30d1e
 
bf6d237
 
 
 
b96229e
 
bf6d237
fc30d1e
 
bf6d237
 
 
 
 
 
 
fc30d1e
bf6d237
fc30d1e
 
bf6d237
fc30d1e
 
bf6d237
fc30d1e
 
 
 
 
 
 
 
 
 
 
 
 
 
bf6d237
 
fc30d1e
 
 
 
 
 
bf6d237
 
 
 
 
 
 
 
 
 
 
 
 
 
 
fc30d1e
 
 
b96229e
fc30d1e
 
b96229e
fc30d1e
b96229e
 
fc30d1e
 
 
 
 
 
b96229e
fc30d1e
b96229e
 
 
fc30d1e
 
 
b96229e
 
 
 
bf6d237
b96229e
 
bf6d237
 
b96229e
 
 
 
 
 
 
fc30d1e
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
from typing import Literal, List
from fastapi import FastAPI
from fastapi import APIRouter, Depends, HTTPException, Request, UploadFile, Response
from pydantic import BaseModel

from private_gpt.server.ingest.ingest_service import IngestService
from private_gpt.server.ingest.model import IngestedDoc
from private_gpt.server.utils.authentication import get_current_user
ingest_router = APIRouter(prefix="/v1", dependencies=[Depends(get_current_user)])

from fastapi import APIRouter, File, UploadFile, HTTPException, Depends
from fastapi.requests import Request

class IngestResponse(BaseModel):
    object: Literal["list"]
    model: Literal["private-gpt"]
    data: list[IngestedDoc]



@ingest_router.post("/ingest", tags=["Ingestion"])
def ingest(request: Request, files: List[UploadFile]) -> IngestResponse:
    """Ingests and processes files, storing their chunks to be used as context."""

    # Check total file count (including existing files)
    service = request.state.injector.get(IngestService)

    existing_documents = service.list_ingested_filenames()

    if len(existing_documents) + len(files) > 5:
        raise HTTPException(403, "File limit reached. Maximum 5 files allowed.")

    # Limit on number of files:
    if len(files) > 5:
        raise HTTPException(400, "File limit reached (maximum 5 files allowed)")

    # Limit on file extensions:
    allowed_extensions = {"pdf", "csv"}
    for file in files:
        if file.filename.lower().split(".")[-1] not in allowed_extensions:
            raise HTTPException(400, "Invalid file type. Please upload PDF or CSV files only.")

    service = request.state.injector.get(IngestService)
    ingested_documents = []
    for file in files:
        if file.filename is None:
            raise HTTPException(400, "No file name provided")
        ingested_documents.extend(service.ingest_bin_data(file.filename, file.file))

    return IngestResponse(object="list", model="private-gpt", data=ingested_documents)


@ingest_router.get("/ingest/list", tags=["Ingestion"])
def list_ingested(request: Request) -> IngestResponse:
    """Lists already ingested Documents including their Document ID and metadata.

    Those IDs can be used to filter the context used to create responses
    in `/chat/completions`, `/completions`, and `/chunks` APIs.
    """
    service = request.state.injector.get(IngestService)
    ingested_documents = service.list_ingested()
    return IngestResponse(object="list", model="private-gpt", data=ingested_documents)


@ingest_router.delete("/ingest", tags=["Ingestion"])
def delete_ingested(request: Request, file_names: List[str]) -> None:
    """Deletes ingested Documents with the specified file names.

    Accepts a list of file names in the request body and deletes
    all documents associated with those file names.
    """

    service = request.state.injector.get(IngestService)

    # Find documents to delete for each file name
    documents_to_delete = []
    for file_name in file_names:
        ingested_documents = service.list_ingested()
        docs_for_file = [doc.doc_id for doc in ingested_documents if doc.doc_metadata.get("file_name") == file_name]
        documents_to_delete.extend(docs_for_file)

    # Delete the documents
    for doc_id_to_delete in documents_to_delete:
        service.delete(doc_id_to_delete)

    return Response(status_code=204)  # No content



@ingest_router.get("/ingest/list_filenames", tags=["Ingestion"], response_model=List[str])
def list_ingested(request: Request) -> List[str]:
    """Lists already ingested Documents including their Document ID and metadata.

    Those IDs can be used to filter the context used to create responses
    in `/chat/completions`, `/completions`, and `/chunks` APIs.
    """
    service = request.state.injector.get(IngestService)
    ingested_documents: List[IngestedDoc] = service.list_ingested()

    # Extract unique filenames
    unique_filenames = set(doc.doc_metadata.get("file_name", "") for doc in ingested_documents)
    unique_filenames_list = list(unique_filenames)

    return unique_filenames_list