Ibraaheem commited on
Commit
fc30d1e
·
1 Parent(s): 7d09ea9

Update private_gpt/server/ingest/ingest_router.py

Browse files
private_gpt/server/ingest/ingest_router.py CHANGED
@@ -1,14 +1,15 @@
1
  from typing import Literal, List
2
-
3
- from fastapi import APIRouter, Depends, HTTPException, Request, UploadFile
4
  from pydantic import BaseModel
5
 
6
  from private_gpt.server.ingest.ingest_service import IngestService
7
  from private_gpt.server.ingest.model import IngestedDoc
8
- #from private_gpt.server.utils.auth import authenticated
9
  from private_gpt.server.utils.authentication import get_current_user
10
  ingest_router = APIRouter(prefix="/v1", dependencies=[Depends(get_current_user)])
11
 
 
 
12
 
13
  class IngestResponse(BaseModel):
14
  object: Literal["list"]
@@ -16,27 +17,36 @@ class IngestResponse(BaseModel):
16
  data: list[IngestedDoc]
17
 
18
 
 
19
  @ingest_router.post("/ingest", tags=["Ingestion"])
20
- def ingest(request: Request, file: UploadFile) -> IngestResponse:
21
- """Ingests and processes a file, storing its chunks to be used as context.
22
 
23
- The context obtained from files is later used in
24
- `/chat/completions`, `/completions`, and `/chunks` APIs.
25
 
26
- Most common document
27
- formats are supported, but you may be prompted to install an extra dependency to
28
- manage a specific file type.
 
 
 
 
 
 
 
 
 
 
 
29
 
30
- A file can generate different Documents (for example a PDF generates one Document
31
- per page). All Documents IDs are returned in the response, together with the
32
- extracted Metadata (which is later used to improve context retrieval). Those IDs
33
- can be used to filter the context used to create responses in
34
- `/chat/completions`, `/completions`, and `/chunks` APIs.
35
- """
36
  service = request.state.injector.get(IngestService)
37
- if file.filename is None:
38
- raise HTTPException(400, "No file name provided")
39
- ingested_documents = service.ingest_bin_data(file.filename, file.file)
 
 
 
40
  return IngestResponse(object="list", model="private-gpt", data=ingested_documents)
41
 
42
 
@@ -52,23 +62,30 @@ def list_ingested(request: Request) -> IngestResponse:
52
  return IngestResponse(object="list", model="private-gpt", data=ingested_documents)
53
 
54
 
55
- @ingest_router.delete("/ingest/{file_name}", tags=["Ingestion"])
56
- def delete_ingested(request: Request, file_name: str) -> None:
57
- """Delete all ingested Documents with the specified file name.
58
 
59
- The `file_name` can be obtained from the `GET /ingest/list` endpoint.
60
- All documents with the specified file name will be effectively deleted from your storage context.
61
  """
 
62
  service = request.state.injector.get(IngestService)
63
 
64
- # Find all doc_ids with the specified file_name
65
- ingested_documents = service.list_ingested()
66
- documents_to_delete = [doc.doc_id for doc in ingested_documents if doc.doc_metadata.get("file_name") == file_name]
 
 
 
67
 
68
- # Delete all documents with the specified file_name
69
  for doc_id_to_delete in documents_to_delete:
70
  service.delete(doc_id_to_delete)
71
 
 
 
 
72
 
73
  @ingest_router.get("/ingest/list_filenames", tags=["Ingestion"], response_model=List[str])
74
  def list_ingested(request: Request) -> List[str]:
@@ -85,3 +102,4 @@ def list_ingested(request: Request) -> List[str]:
85
  unique_filenames_list = list(unique_filenames)
86
 
87
  return unique_filenames_list
 
 
1
  from typing import Literal, List
2
+ from fastapi import FastAPI
3
+ from fastapi import APIRouter, Depends, HTTPException, Request, UploadFile, Response
4
  from pydantic import BaseModel
5
 
6
  from private_gpt.server.ingest.ingest_service import IngestService
7
  from private_gpt.server.ingest.model import IngestedDoc
 
8
  from private_gpt.server.utils.authentication import get_current_user
9
  ingest_router = APIRouter(prefix="/v1", dependencies=[Depends(get_current_user)])
10
 
11
+ from fastapi import APIRouter, File, UploadFile, HTTPException, Depends
12
+ from fastapi.requests import Request
13
 
14
  class IngestResponse(BaseModel):
15
  object: Literal["list"]
 
17
  data: list[IngestedDoc]
18
 
19
 
20
+
21
  @ingest_router.post("/ingest", tags=["Ingestion"])
22
+ def ingest(request: Request, files: List[UploadFile]) -> IngestResponse:
23
+ """Ingests and processes files, storing their chunks to be used as context."""
24
 
25
+ # Check total file count (including existing files)
26
+ service = request.state.injector.get(IngestService)
27
 
28
+ existing_documents = service.list_ingested_filenames()
29
+
30
+ if len(existing_documents) + len(files) > 5:
31
+ raise HTTPException(403, "File limit reached. Maximum 5 files allowed.")
32
+
33
+ # Limit on number of files:
34
+ if len(files) > 5:
35
+ raise HTTPException(400, "File limit reached (maximum 5 files allowed)")
36
+
37
+ # Limit on file extensions:
38
+ allowed_extensions = {"pdf", "csv"}
39
+ for file in files:
40
+ if file.filename.lower().split(".")[-1] not in allowed_extensions:
41
+ raise HTTPException(400, "Invalid file type. Please upload PDF or CSV files only.")
42
 
 
 
 
 
 
 
43
  service = request.state.injector.get(IngestService)
44
+ ingested_documents = []
45
+ for file in files:
46
+ if file.filename is None:
47
+ raise HTTPException(400, "No file name provided")
48
+ ingested_documents.extend(service.ingest_bin_data(file.filename, file.file))
49
+
50
  return IngestResponse(object="list", model="private-gpt", data=ingested_documents)
51
 
52
 
 
62
  return IngestResponse(object="list", model="private-gpt", data=ingested_documents)
63
 
64
 
65
+ @ingest_router.delete("/ingest", tags=["Ingestion"])
66
+ def delete_ingested(request: Request, file_names: List[str]) -> None:
67
+ """Deletes ingested Documents with the specified file names.
68
 
69
+ Accepts a list of file names in the request body and deletes
70
+ all documents associated with those file names.
71
  """
72
+
73
  service = request.state.injector.get(IngestService)
74
 
75
+ # Find documents to delete for each file name
76
+ documents_to_delete = []
77
+ for file_name in file_names:
78
+ ingested_documents = service.list_ingested()
79
+ docs_for_file = [doc.doc_id for doc in ingested_documents if doc.doc_metadata.get("file_name") == file_name]
80
+ documents_to_delete.extend(docs_for_file)
81
 
82
+ # Delete the documents
83
  for doc_id_to_delete in documents_to_delete:
84
  service.delete(doc_id_to_delete)
85
 
86
+ return Response(status_code=204) # No content
87
+
88
+
89
 
90
  @ingest_router.get("/ingest/list_filenames", tags=["Ingestion"], response_model=List[str])
91
  def list_ingested(request: Request) -> List[str]:
 
102
  unique_filenames_list = list(unique_filenames)
103
 
104
  return unique_filenames_list
105
+