Spaces:

Ibraaheem
/

invenxion-chatbot

Sleeping

App Files Files Community

invenxion-chatbot / private_gpt /server /ingest /ingest_router.py

Ibraaheem

Update private_gpt/server/ingest/ingest_router.py

fc30d1e almost 2 years ago

raw

history blame

4.1 kB

	from typing import Literal, List
	from fastapi import FastAPI
	from fastapi import APIRouter, Depends, HTTPException, Request, UploadFile, Response
	from pydantic import BaseModel

	from private_gpt.server.ingest.ingest_service import IngestService
	from private_gpt.server.ingest.model import IngestedDoc
	from private_gpt.server.utils.authentication import get_current_user
	ingest_router = APIRouter(prefix="/v1", dependencies=[Depends(get_current_user)])

	from fastapi import APIRouter, File, UploadFile, HTTPException, Depends
	from fastapi.requests import Request

	class IngestResponse(BaseModel):
	object: Literal["list"]
	model: Literal["private-gpt"]
	data: list[IngestedDoc]



	@ingest_router.post("/ingest", tags=["Ingestion"])
	def ingest(request: Request, files: List[UploadFile]) -> IngestResponse:
	"""Ingests and processes files, storing their chunks to be used as context."""

	# Check total file count (including existing files)
	service = request.state.injector.get(IngestService)

	existing_documents = service.list_ingested_filenames()

	if len(existing_documents) + len(files) > 5:
	raise HTTPException(403, "File limit reached. Maximum 5 files allowed.")

	# Limit on number of files:
	if len(files) > 5:
	raise HTTPException(400, "File limit reached (maximum 5 files allowed)")

	# Limit on file extensions:
	allowed_extensions = {"pdf", "csv"}
	for file in files:
	if file.filename.lower().split(".")[-1] not in allowed_extensions:
	raise HTTPException(400, "Invalid file type. Please upload PDF or CSV files only.")

	service = request.state.injector.get(IngestService)
	ingested_documents = []
	for file in files:
	if file.filename is None:
	raise HTTPException(400, "No file name provided")
	ingested_documents.extend(service.ingest_bin_data(file.filename, file.file))

	return IngestResponse(object="list", model="private-gpt", data=ingested_documents)


	@ingest_router.get("/ingest/list", tags=["Ingestion"])
	def list_ingested(request: Request) -> IngestResponse:
	"""Lists already ingested Documents including their Document ID and metadata.

	Those IDs can be used to filter the context used to create responses
	in `/chat/completions`, `/completions`, and `/chunks` APIs.
	"""
	service = request.state.injector.get(IngestService)
	ingested_documents = service.list_ingested()
	return IngestResponse(object="list", model="private-gpt", data=ingested_documents)


	@ingest_router.delete("/ingest", tags=["Ingestion"])
	def delete_ingested(request: Request, file_names: List[str]) -> None:
	"""Deletes ingested Documents with the specified file names.

	Accepts a list of file names in the request body and deletes
	all documents associated with those file names.
	"""

	service = request.state.injector.get(IngestService)

	# Find documents to delete for each file name
	documents_to_delete = []
	for file_name in file_names:
	ingested_documents = service.list_ingested()
	docs_for_file = [doc.doc_id for doc in ingested_documents if doc.doc_metadata.get("file_name") == file_name]
	documents_to_delete.extend(docs_for_file)

	# Delete the documents
	for doc_id_to_delete in documents_to_delete:
	service.delete(doc_id_to_delete)

	return Response(status_code=204) # No content



	@ingest_router.get("/ingest/list_filenames", tags=["Ingestion"], response_model=List[str])
	def list_ingested(request: Request) -> List[str]:
	"""Lists already ingested Documents including their Document ID and metadata.

	Those IDs can be used to filter the context used to create responses
	in `/chat/completions`, `/completions`, and `/chunks` APIs.
	"""
	service = request.state.injector.get(IngestService)
	ingested_documents: List[IngestedDoc] = service.list_ingested()

	# Extract unique filenames
	unique_filenames = set(doc.doc_metadata.get("file_name", "") for doc in ingested_documents)
	unique_filenames_list = list(unique_filenames)

	return unique_filenames_list