File size: 1,876 Bytes
8e29341
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
776fff9
 
8e29341
776fff9
8e29341
 
 
 
92db39d
8e29341
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
import os
from langchain_community.document_loaders import (CSVLoader, TextLoader, UnstructuredExcelLoader, Docx2txtLoader,
                                                   UnstructuredFileLoader, UnstructuredMarkdownLoader, UnstructuredHTMLLoader, JSONLoader)
from chromadb.config import Settings

from modules import app_logger

app_logger = app_logger.app_logger
# Use shared_variable in this module
openai_api_key = os.environ.get("OPENAI_API_KEY", "NONE")

# Set default values if environment variables are not found
#mongodb_uri = os.environ.get("MONGODB_URI", "mongodb://localhost:27017")
local_model_uri = os.environ.get("LOCAL_OPENAI_URI", "http://localhost:8000/v1")
#local_model_uri = os.environ.get("LOCAL_OPENAI_URI", None)
DOCUMENT_MAP = {
    ".html": UnstructuredHTMLLoader,
    ".txt": TextLoader,
    ".md": UnstructuredMarkdownLoader,
    ".py": TextLoader,
    ".json": JSONLoader,
    ".jsonl": JSONLoader,
    ".pdf": UnstructuredFileLoader,
    ".csv": CSVLoader,
    ".xls": UnstructuredExcelLoader,
    ".xlsx": UnstructuredExcelLoader,
    ".docx": Docx2txtLoader,
    ".doc": Docx2txtLoader,
}
MODELS_PATH = "./models"
EMBEDDING_MODEL_NAME = 'sentence-transformers/all-MiniLM-L6-v2'
MODEL_NAME = 'gpt-3.5-turbo'
# Constants
WORKSPACE_DIRECTORY = './workspace/'


CHROMA_SETTINGS = Settings(
    anonymized_telemetry=False,
    is_persistent=True,
)
INGEST_THREADS = os.cpu_count() or 8

CHUNK_SIZE = 880
CHUNK_OVERLAP = 200
PROCESSED_DOCS = 'index_processed.log'
SEARCH_COUNT = 5
MESSAGE_HISTORY = 5
RAG_K = 5
RAG_TECHNIQUE = 'refine'
SUMMARIZER_BATCH = 4
MAX_FILE_SIZE = 10 #not implement
LOCAL_PERSISTANT_DB = WORKSPACE_DIRECTORY + "db/"
CONTENT_TYPE = ["Policies", "Playbooks", "Standards", "Reference Docs"]
SYSTEM_CONTENT_DATA = "app-content.json"
SYSTEM_DEPLOYMENT_MODE = 0 
ZYSEC_DEMO = "http://zysec.is-a-geek.com:8000/v1" #not enabled yet