Spaces:
Sleeping
Sleeping
Update private_gpt/server/ingest/ingest_router.py
Browse files
private_gpt/server/ingest/ingest_router.py
CHANGED
@@ -1,14 +1,15 @@
|
|
1 |
from typing import Literal, List
|
2 |
-
|
3 |
-
from fastapi import APIRouter, Depends, HTTPException, Request, UploadFile
|
4 |
from pydantic import BaseModel
|
5 |
|
6 |
from private_gpt.server.ingest.ingest_service import IngestService
|
7 |
from private_gpt.server.ingest.model import IngestedDoc
|
8 |
-
#from private_gpt.server.utils.auth import authenticated
|
9 |
from private_gpt.server.utils.authentication import get_current_user
|
10 |
ingest_router = APIRouter(prefix="/v1", dependencies=[Depends(get_current_user)])
|
11 |
|
|
|
|
|
12 |
|
13 |
class IngestResponse(BaseModel):
|
14 |
object: Literal["list"]
|
@@ -16,27 +17,36 @@ class IngestResponse(BaseModel):
|
|
16 |
data: list[IngestedDoc]
|
17 |
|
18 |
|
|
|
19 |
@ingest_router.post("/ingest", tags=["Ingestion"])
|
20 |
-
def ingest(request: Request,
|
21 |
-
"""Ingests and processes
|
22 |
|
23 |
-
|
24 |
-
|
25 |
|
26 |
-
|
27 |
-
|
28 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
29 |
|
30 |
-
A file can generate different Documents (for example a PDF generates one Document
|
31 |
-
per page). All Documents IDs are returned in the response, together with the
|
32 |
-
extracted Metadata (which is later used to improve context retrieval). Those IDs
|
33 |
-
can be used to filter the context used to create responses in
|
34 |
-
`/chat/completions`, `/completions`, and `/chunks` APIs.
|
35 |
-
"""
|
36 |
service = request.state.injector.get(IngestService)
|
37 |
-
|
38 |
-
|
39 |
-
|
|
|
|
|
|
|
40 |
return IngestResponse(object="list", model="private-gpt", data=ingested_documents)
|
41 |
|
42 |
|
@@ -52,23 +62,30 @@ def list_ingested(request: Request) -> IngestResponse:
|
|
52 |
return IngestResponse(object="list", model="private-gpt", data=ingested_documents)
|
53 |
|
54 |
|
55 |
-
@ingest_router.delete("/ingest
|
56 |
-
def delete_ingested(request: Request,
|
57 |
-
"""
|
58 |
|
59 |
-
|
60 |
-
|
61 |
"""
|
|
|
62 |
service = request.state.injector.get(IngestService)
|
63 |
|
64 |
-
# Find
|
65 |
-
|
66 |
-
|
|
|
|
|
|
|
67 |
|
68 |
-
# Delete
|
69 |
for doc_id_to_delete in documents_to_delete:
|
70 |
service.delete(doc_id_to_delete)
|
71 |
|
|
|
|
|
|
|
72 |
|
73 |
@ingest_router.get("/ingest/list_filenames", tags=["Ingestion"], response_model=List[str])
|
74 |
def list_ingested(request: Request) -> List[str]:
|
@@ -85,3 +102,4 @@ def list_ingested(request: Request) -> List[str]:
|
|
85 |
unique_filenames_list = list(unique_filenames)
|
86 |
|
87 |
return unique_filenames_list
|
|
|
|
1 |
from typing import Literal, List
|
2 |
+
from fastapi import FastAPI
|
3 |
+
from fastapi import APIRouter, Depends, HTTPException, Request, UploadFile, Response
|
4 |
from pydantic import BaseModel
|
5 |
|
6 |
from private_gpt.server.ingest.ingest_service import IngestService
|
7 |
from private_gpt.server.ingest.model import IngestedDoc
|
|
|
8 |
from private_gpt.server.utils.authentication import get_current_user
|
9 |
ingest_router = APIRouter(prefix="/v1", dependencies=[Depends(get_current_user)])
|
10 |
|
11 |
+
from fastapi import APIRouter, File, UploadFile, HTTPException, Depends
|
12 |
+
from fastapi.requests import Request
|
13 |
|
14 |
class IngestResponse(BaseModel):
|
15 |
object: Literal["list"]
|
|
|
17 |
data: list[IngestedDoc]
|
18 |
|
19 |
|
20 |
+
|
21 |
@ingest_router.post("/ingest", tags=["Ingestion"])
|
22 |
+
def ingest(request: Request, files: List[UploadFile]) -> IngestResponse:
|
23 |
+
"""Ingests and processes files, storing their chunks to be used as context."""
|
24 |
|
25 |
+
# Check total file count (including existing files)
|
26 |
+
service = request.state.injector.get(IngestService)
|
27 |
|
28 |
+
existing_documents = service.list_ingested_filenames()
|
29 |
+
|
30 |
+
if len(existing_documents) + len(files) > 5:
|
31 |
+
raise HTTPException(403, "File limit reached. Maximum 5 files allowed.")
|
32 |
+
|
33 |
+
# Limit on number of files:
|
34 |
+
if len(files) > 5:
|
35 |
+
raise HTTPException(400, "File limit reached (maximum 5 files allowed)")
|
36 |
+
|
37 |
+
# Limit on file extensions:
|
38 |
+
allowed_extensions = {"pdf", "csv"}
|
39 |
+
for file in files:
|
40 |
+
if file.filename.lower().split(".")[-1] not in allowed_extensions:
|
41 |
+
raise HTTPException(400, "Invalid file type. Please upload PDF or CSV files only.")
|
42 |
|
|
|
|
|
|
|
|
|
|
|
|
|
43 |
service = request.state.injector.get(IngestService)
|
44 |
+
ingested_documents = []
|
45 |
+
for file in files:
|
46 |
+
if file.filename is None:
|
47 |
+
raise HTTPException(400, "No file name provided")
|
48 |
+
ingested_documents.extend(service.ingest_bin_data(file.filename, file.file))
|
49 |
+
|
50 |
return IngestResponse(object="list", model="private-gpt", data=ingested_documents)
|
51 |
|
52 |
|
|
|
62 |
return IngestResponse(object="list", model="private-gpt", data=ingested_documents)
|
63 |
|
64 |
|
65 |
+
@ingest_router.delete("/ingest", tags=["Ingestion"])
|
66 |
+
def delete_ingested(request: Request, file_names: List[str]) -> None:
|
67 |
+
"""Deletes ingested Documents with the specified file names.
|
68 |
|
69 |
+
Accepts a list of file names in the request body and deletes
|
70 |
+
all documents associated with those file names.
|
71 |
"""
|
72 |
+
|
73 |
service = request.state.injector.get(IngestService)
|
74 |
|
75 |
+
# Find documents to delete for each file name
|
76 |
+
documents_to_delete = []
|
77 |
+
for file_name in file_names:
|
78 |
+
ingested_documents = service.list_ingested()
|
79 |
+
docs_for_file = [doc.doc_id for doc in ingested_documents if doc.doc_metadata.get("file_name") == file_name]
|
80 |
+
documents_to_delete.extend(docs_for_file)
|
81 |
|
82 |
+
# Delete the documents
|
83 |
for doc_id_to_delete in documents_to_delete:
|
84 |
service.delete(doc_id_to_delete)
|
85 |
|
86 |
+
return Response(status_code=204) # No content
|
87 |
+
|
88 |
+
|
89 |
|
90 |
@ingest_router.get("/ingest/list_filenames", tags=["Ingestion"], response_model=List[str])
|
91 |
def list_ingested(request: Request) -> List[str]:
|
|
|
102 |
unique_filenames_list = list(unique_filenames)
|
103 |
|
104 |
return unique_filenames_list
|
105 |
+
|