GraphGen / graphgen /configs /aggregated_config.yaml
github-actions[bot]
Auto-sync from demo at Fri Nov 7 10:46:57 UTC 2025
283e483
raw
history blame
1.18 kB
pipeline:
- name: read
params:
input_file: resources/input_examples/jsonl_demo.jsonl # input file path, support json, jsonl, txt, pdf. See resources/input_examples for examples
- name: chunk
params:
chunk_size: 1024 # chunk size for text splitting
chunk_overlap: 100 # chunk overlap for text splitting
- name: build_kg
- name: quiz_and_judge
params:
quiz_samples: 2 # number of quiz samples to generate
re_judge: false # whether to re-judge the existing quiz samples
- name: partition
deps: [quiz_and_judge] # ece depends on quiz_and_judge steps
params:
method: ece # ece is a custom partition method based on comprehension loss
method_params:
max_units_per_community: 20 # max nodes and edges per community
min_units_per_community: 5 # min nodes and edges per community
max_tokens_per_community: 10240 # max tokens per community
unit_sampling: max_loss # unit sampling strategy, support: random, max_loss, min_loss
- name: generate
params:
method: aggregated # atomic, aggregated, multi_hop, cot, vqa
data_format: ChatML # Alpaca, Sharegpt, ChatML