File size: 1,876 Bytes
8e29341 776fff9 8e29341 776fff9 8e29341 92db39d 8e29341 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 |
import os
from langchain_community.document_loaders import (CSVLoader, TextLoader, UnstructuredExcelLoader, Docx2txtLoader,
UnstructuredFileLoader, UnstructuredMarkdownLoader, UnstructuredHTMLLoader, JSONLoader)
from chromadb.config import Settings
from modules import app_logger
app_logger = app_logger.app_logger
# Use shared_variable in this module
openai_api_key = os.environ.get("OPENAI_API_KEY", "NONE")
# Set default values if environment variables are not found
#mongodb_uri = os.environ.get("MONGODB_URI", "mongodb://localhost:27017")
local_model_uri = os.environ.get("LOCAL_OPENAI_URI", "http://localhost:8000/v1")
#local_model_uri = os.environ.get("LOCAL_OPENAI_URI", None)
DOCUMENT_MAP = {
".html": UnstructuredHTMLLoader,
".txt": TextLoader,
".md": UnstructuredMarkdownLoader,
".py": TextLoader,
".json": JSONLoader,
".jsonl": JSONLoader,
".pdf": UnstructuredFileLoader,
".csv": CSVLoader,
".xls": UnstructuredExcelLoader,
".xlsx": UnstructuredExcelLoader,
".docx": Docx2txtLoader,
".doc": Docx2txtLoader,
}
MODELS_PATH = "./models"
EMBEDDING_MODEL_NAME = 'sentence-transformers/all-MiniLM-L6-v2'
MODEL_NAME = 'gpt-3.5-turbo'
# Constants
WORKSPACE_DIRECTORY = './workspace/'
CHROMA_SETTINGS = Settings(
anonymized_telemetry=False,
is_persistent=True,
)
INGEST_THREADS = os.cpu_count() or 8
CHUNK_SIZE = 880
CHUNK_OVERLAP = 200
PROCESSED_DOCS = 'index_processed.log'
SEARCH_COUNT = 5
MESSAGE_HISTORY = 5
RAG_K = 5
RAG_TECHNIQUE = 'refine'
SUMMARIZER_BATCH = 4
MAX_FILE_SIZE = 10 #not implement
LOCAL_PERSISTANT_DB = WORKSPACE_DIRECTORY + "db/"
CONTENT_TYPE = ["Policies", "Playbooks", "Standards", "Reference Docs"]
SYSTEM_CONTENT_DATA = "app-content.json"
SYSTEM_DEPLOYMENT_MODE = 0
ZYSEC_DEMO = "http://zysec.is-a-geek.com:8000/v1" #not enabled yet |