Spaces:

Ibraaheem
/

invenxion-chatbot

Sleeping

App Files Files Community

Ibraaheem commited on Dec 26, 2023

Commit

fc30d1e

1 Parent(s): 7d09ea9

Update private_gpt/server/ingest/ingest_router.py

Browse files

Files changed (1) hide show

private_gpt/server/ingest/ingest_router.py +46 -28

private_gpt/server/ingest/ingest_router.py CHANGED Viewed

@@ -1,14 +1,15 @@
 from typing import Literal, List
-from fastapi import APIRouter, Depends, HTTPException, Request, UploadFile
 from pydantic import BaseModel
 from private_gpt.server.ingest.ingest_service import IngestService
 from private_gpt.server.ingest.model import IngestedDoc
-#from private_gpt.server.utils.auth import authenticated
 from private_gpt.server.utils.authentication import get_current_user
 ingest_router = APIRouter(prefix="/v1", dependencies=[Depends(get_current_user)])
 class IngestResponse(BaseModel):
     object: Literal["list"]
@@ -16,27 +17,36 @@ class IngestResponse(BaseModel):
     data: list[IngestedDoc]
 @ingest_router.post("/ingest", tags=["Ingestion"])
-def ingest(request: Request, file: UploadFile) -> IngestResponse:
-    """Ingests and processes a file, storing its chunks to be used as context.
-    The context obtained from files is later used in
-    `/chat/completions`, `/completions`, and `/chunks` APIs.
-    Most common document
-    formats are supported, but you may be prompted to install an extra dependency to
-    manage a specific file type.
-    A file can generate different Documents (for example a PDF generates one Document
-    per page). All Documents IDs are returned in the response, together with the
-    extracted Metadata (which is later used to improve context retrieval). Those IDs
-    can be used to filter the context used to create responses in
-    `/chat/completions`, `/completions`, and `/chunks` APIs.
-    """
     service = request.state.injector.get(IngestService)
-    if file.filename is None:
-        raise HTTPException(400, "No file name provided")
-    ingested_documents = service.ingest_bin_data(file.filename, file.file)
     return IngestResponse(object="list", model="private-gpt", data=ingested_documents)
@@ -52,23 +62,30 @@ def list_ingested(request: Request) -> IngestResponse:
     return IngestResponse(object="list", model="private-gpt", data=ingested_documents)
-@ingest_router.delete("/ingest/{file_name}", tags=["Ingestion"])
-def delete_ingested(request: Request, file_name: str) -> None:
-    """Delete all ingested Documents with the specified file name.
-    The `file_name` can be obtained from the `GET /ingest/list` endpoint.
-    All documents with the specified file name will be effectively deleted from your storage context.
     """
     service = request.state.injector.get(IngestService)
-    # Find all doc_ids with the specified file_name
-    ingested_documents = service.list_ingested()
-    documents_to_delete = [doc.doc_id for doc in ingested_documents if doc.doc_metadata.get("file_name") == file_name]
-    # Delete all documents with the specified file_name
     for doc_id_to_delete in documents_to_delete:
         service.delete(doc_id_to_delete)
 @ingest_router.get("/ingest/list_filenames", tags=["Ingestion"], response_model=List[str])
 def list_ingested(request: Request) -> List[str]:
@@ -85,3 +102,4 @@ def list_ingested(request: Request) -> List[str]:
     unique_filenames_list = list(unique_filenames)
     return unique_filenames_list

 from typing import Literal, List
+from fastapi import FastAPI
+from fastapi import APIRouter, Depends, HTTPException, Request, UploadFile, Response
 from pydantic import BaseModel
 from private_gpt.server.ingest.ingest_service import IngestService
 from private_gpt.server.ingest.model import IngestedDoc
 from private_gpt.server.utils.authentication import get_current_user
 ingest_router = APIRouter(prefix="/v1", dependencies=[Depends(get_current_user)])
+from fastapi import APIRouter, File, UploadFile, HTTPException, Depends
+from fastapi.requests import Request
 class IngestResponse(BaseModel):
     object: Literal["list"]
     data: list[IngestedDoc]
 @ingest_router.post("/ingest", tags=["Ingestion"])
+def ingest(request: Request, files: List[UploadFile]) -> IngestResponse:
+    """Ingests and processes files, storing their chunks to be used as context."""
+    # Check total file count (including existing files)
+    service = request.state.injector.get(IngestService)
+    existing_documents = service.list_ingested_filenames()
+    if len(existing_documents) + len(files) > 5:
+        raise HTTPException(403, "File limit reached. Maximum 5 files allowed.")
+    # Limit on number of files:
+    if len(files) > 5:
+        raise HTTPException(400, "File limit reached (maximum 5 files allowed)")
+    # Limit on file extensions:
+    allowed_extensions = {"pdf", "csv"}
+    for file in files:
+        if file.filename.lower().split(".")[-1] not in allowed_extensions:
+            raise HTTPException(400, "Invalid file type. Please upload PDF or CSV files only.")
     service = request.state.injector.get(IngestService)
+    ingested_documents = []
+    for file in files:
+        if file.filename is None:
+            raise HTTPException(400, "No file name provided")
+        ingested_documents.extend(service.ingest_bin_data(file.filename, file.file))
     return IngestResponse(object="list", model="private-gpt", data=ingested_documents)
     return IngestResponse(object="list", model="private-gpt", data=ingested_documents)
+@ingest_router.delete("/ingest", tags=["Ingestion"])
+def delete_ingested(request: Request, file_names: List[str]) -> None:
+    """Deletes ingested Documents with the specified file names.
+    Accepts a list of file names in the request body and deletes
+    all documents associated with those file names.
     """
     service = request.state.injector.get(IngestService)
+    # Find documents to delete for each file name
+    documents_to_delete = []
+    for file_name in file_names:
+        ingested_documents = service.list_ingested()
+        docs_for_file = [doc.doc_id for doc in ingested_documents if doc.doc_metadata.get("file_name") == file_name]
+        documents_to_delete.extend(docs_for_file)
+    # Delete the documents
     for doc_id_to_delete in documents_to_delete:
         service.delete(doc_id_to_delete)
+    return Response(status_code=204)  # No content
 @ingest_router.get("/ingest/list_filenames", tags=["Ingestion"], response_model=List[str])
 def list_ingested(request: Request) -> List[str]:
     unique_filenames_list = list(unique_filenames)
     return unique_filenames_list