Spaces:
Sleeping
Sleeping
Update settings.yaml
Browse files- settings.yaml +110 -5
settings.yaml
CHANGED
@@ -1,9 +1,11 @@
|
|
|
|
|
|
1 |
llm:
|
2 |
api_key: "3bf18984-b4df-49ba-a30b-6cbae3964b08"
|
3 |
type: openai_chat
|
4 |
model_supports_json: true
|
5 |
-
model:
|
6 |
-
api_base:
|
7 |
# max_tokens: 10000 # Adjusted based on Claude 3 Haiku's typical context window
|
8 |
request_timeout: 30
|
9 |
tokens_per_minute: 100000
|
@@ -11,9 +13,112 @@ llm:
|
|
11 |
max_retry_wait: 5
|
12 |
temperature: 0.1
|
13 |
|
14 |
-
|
15 |
async_mode: threaded
|
16 |
llm:
|
|
|
17 |
type: openai_embedding
|
18 |
-
model:
|
19 |
-
api_base:
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
encoding_model: cl100k_base
|
2 |
+
skip_workflows: []
|
3 |
llm:
|
4 |
api_key: "3bf18984-b4df-49ba-a30b-6cbae3964b08"
|
5 |
type: openai_chat
|
6 |
model_supports_json: true
|
7 |
+
model: claude-3-5-sonnet-20240620
|
8 |
+
api_base: http://localhost:8000/v1
|
9 |
# max_tokens: 10000 # Adjusted based on Claude 3 Haiku's typical context window
|
10 |
request_timeout: 30
|
11 |
tokens_per_minute: 100000
|
|
|
13 |
max_retry_wait: 5
|
14 |
temperature: 0.1
|
15 |
|
16 |
+
embeddings:
|
17 |
async_mode: threaded
|
18 |
llm:
|
19 |
+
api_key: "EMBEDDING_API_KEY"
|
20 |
type: openai_embedding
|
21 |
+
model: mixedbread-ai/mxbai-embed-large-v1
|
22 |
+
api_base: http://localhost:7997
|
23 |
+
|
24 |
+
chunks:
|
25 |
+
size: 1200
|
26 |
+
overlap: 100
|
27 |
+
group_by_columns: [id] # by default, we don't allow chunks to cross documents
|
28 |
+
|
29 |
+
input:
|
30 |
+
type: file # or blob
|
31 |
+
file_type: text # or csv
|
32 |
+
base_dir: "input"
|
33 |
+
file_encoding: utf-8
|
34 |
+
file_pattern: ".*\\.txt$"
|
35 |
+
|
36 |
+
cache:
|
37 |
+
type: file # or blob
|
38 |
+
base_dir: "cache"
|
39 |
+
# connection_string: <azure_blob_storage_connection_string>
|
40 |
+
# container_name: <azure_blob_storage_container_name>
|
41 |
+
|
42 |
+
storage:
|
43 |
+
type: file # or blob
|
44 |
+
base_dir: "output/${timestamp}/artifacts"
|
45 |
+
# connection_string: <azure_blob_storage_connection_string>
|
46 |
+
# container_name: <azure_blob_storage_container_name>
|
47 |
+
|
48 |
+
reporting:
|
49 |
+
type: file # or console, blob
|
50 |
+
base_dir: "output/${timestamp}/reports"
|
51 |
+
# connection_string: <azure_blob_storage_connection_string>
|
52 |
+
# container_name: <azure_blob_storage_container_name>
|
53 |
+
|
54 |
+
entity_extraction:
|
55 |
+
## llm: override the global llm settings for this task
|
56 |
+
## parallelization: override the global parallelization settings for this task
|
57 |
+
## async_mode: override the global async_mode settings for this task
|
58 |
+
prompt: "prompts/entity_extraction.txt"
|
59 |
+
entity_types: [organization,person,geo,event]
|
60 |
+
max_gleanings: 1
|
61 |
+
|
62 |
+
summarize_descriptions:
|
63 |
+
## llm: override the global llm settings for this task
|
64 |
+
## parallelization: override the global parallelization settings for this task
|
65 |
+
## async_mode: override the global async_mode settings for this task
|
66 |
+
prompt: "prompts/summarize_descriptions.txt"
|
67 |
+
max_length: 500
|
68 |
+
|
69 |
+
claim_extraction:
|
70 |
+
## llm: override the global llm settings for this task
|
71 |
+
## parallelization: override the global parallelization settings for this task
|
72 |
+
## async_mode: override the global async_mode settings for this task
|
73 |
+
# enabled: true
|
74 |
+
prompt: "prompts/claim_extraction.txt"
|
75 |
+
description: "Any claims or facts that could be relevant to information discovery."
|
76 |
+
max_gleanings: 1
|
77 |
+
|
78 |
+
community_reports:
|
79 |
+
## llm: override the global llm settings for this task
|
80 |
+
## parallelization: override the global parallelization settings for this task
|
81 |
+
## async_mode: override the global async_mode settings for this task
|
82 |
+
prompt: "prompts/community_report.txt"
|
83 |
+
max_length: 2000
|
84 |
+
max_input_length: 8000
|
85 |
+
|
86 |
+
cluster_graph:
|
87 |
+
max_cluster_size: 10
|
88 |
+
|
89 |
+
embed_graph:
|
90 |
+
enabled: false # if true, will generate node2vec embeddings for nodes
|
91 |
+
# num_walks: 10
|
92 |
+
# walk_length: 40
|
93 |
+
# window_size: 2
|
94 |
+
# iterations: 3
|
95 |
+
# random_seed: 597832
|
96 |
+
|
97 |
+
umap:
|
98 |
+
enabled: false # if true, will generate UMAP embeddings for nodes
|
99 |
+
|
100 |
+
snapshots:
|
101 |
+
graphml: false
|
102 |
+
raw_entities: false
|
103 |
+
top_level_nodes: false
|
104 |
+
|
105 |
+
local_search:
|
106 |
+
# text_unit_prop: 0.5
|
107 |
+
# community_prop: 0.1
|
108 |
+
# conversation_history_max_turns: 5
|
109 |
+
# top_k_mapped_entities: 10
|
110 |
+
# top_k_relationships: 10
|
111 |
+
# llm_temperature: 0 # temperature for sampling
|
112 |
+
# llm_top_p: 1 # top-p sampling
|
113 |
+
# llm_n: 1 # Number of completions to generate
|
114 |
+
# max_tokens: 12000
|
115 |
+
|
116 |
+
global_search:
|
117 |
+
# llm_temperature: 0 # temperature for sampling
|
118 |
+
# llm_top_p: 1 # top-p sampling
|
119 |
+
# llm_n: 1 # Number of completions to generate
|
120 |
+
# max_tokens: 12000
|
121 |
+
# data_max_tokens: 12000
|
122 |
+
# map_max_tokens: 1000
|
123 |
+
# reduce_max_tokens: 2000
|
124 |
+
# concurrency: 32
|