Spaces:

Ibraaheem
/

invenxion-chatbot

Sleeping

App Files Files Community

invenxion-chatbot / private_gpt /server /ingest /ingest_router.py

Ibraaheem

Update private_gpt/server/ingest/ingest_router.py

b96229e almost 2 years ago

raw

history blame

3.79 kB

	from typing import Literal, List

	from fastapi import APIRouter, Depends, HTTPException, Request, UploadFile
	from pydantic import BaseModel

	from private_gpt.server.ingest.ingest_service import IngestService
	from private_gpt.server.ingest.model import IngestedDoc
	#from private_gpt.server.utils.auth import authenticated
	from private_gpt.server.utils.authentication import get_current_user
	ingest_router = APIRouter(prefix="/v1", dependencies=[Depends(get_current_user)])


	class IngestResponse(BaseModel):
	object: Literal["list"]
	model: Literal["private-gpt"]
	data: list[IngestedDoc]


	@ingest_router.post("/ingest", tags=["Ingestion"])
	def ingest(request: Request, file: UploadFile) -> IngestResponse:
	"""Ingests and processes a file, storing its chunks to be used as context.

	The context obtained from files is later used in
	`/chat/completions`, `/completions`, and `/chunks` APIs.

	Most common document
	formats are supported, but you may be prompted to install an extra dependency to
	manage a specific file type.

	A file can generate different Documents (for example a PDF generates one Document
	per page). All Documents IDs are returned in the response, together with the
	extracted Metadata (which is later used to improve context retrieval). Those IDs
	can be used to filter the context used to create responses in
	`/chat/completions`, `/completions`, and `/chunks` APIs.
	"""
	service = request.state.injector.get(IngestService)
	if file.filename is None:
	raise HTTPException(400, "No file name provided")
	ingested_documents = service.ingest_bin_data(file.filename, file.file)
	return IngestResponse(object="list", model="private-gpt", data=ingested_documents)


	@ingest_router.get("/ingest/list", tags=["Ingestion"])
	def list_ingested(request: Request) -> IngestResponse:
	"""Lists already ingested Documents including their Document ID and metadata.

	Those IDs can be used to filter the context used to create responses
	in `/chat/completions`, `/completions`, and `/chunks` APIs.
	"""
	service = request.state.injector.get(IngestService)
	ingested_documents = service.list_ingested()
	return IngestResponse(object="list", model="private-gpt", data=ingested_documents)


	@ingest_router.delete("/ingest/{file_name}", tags=["Ingestion"])
	def delete_ingested(request: Request, file_name: str) -> None:
	"""Delete all ingested Documents with the specified file name.

	The `file_name` can be obtained from the `GET /ingest/list` endpoint.
	All documents with the specified file name will be effectively deleted from your storage context.
	"""
	service = request.state.injector.get(IngestService)

	# Find all doc_ids with the specified file_name
	ingested_documents = service.list_ingested()
	documents_to_delete = [doc.doc_id for doc in ingested_documents if doc.doc_metadata.get("file_name") == file_name]

	# Delete all documents with the specified file_name
	for doc_id_to_delete in documents_to_delete:
	service.delete(doc_id_to_delete)


	@ingest_router.get("/ingest/list_filenames", tags=["Ingestion"], response_model=List[str])
	def list_ingested(request: Request) -> List[str]:
	"""Lists already ingested Documents including their Document ID and metadata.

	Those IDs can be used to filter the context used to create responses
	in `/chat/completions`, `/completions`, and `/chunks` APIs.
	"""
	service = request.state.injector.get(IngestService)
	ingested_documents: List[IngestedDoc] = service.list_ingested()

	# Extract unique filenames
	unique_filenames = set(doc.doc_metadata.get("file_name", "") for doc in ingested_documents)
	unique_filenames_list = list(unique_filenames)

	return unique_filenames_list