In [1]:
from src.benchmarks.get_qa_dataset import get_qa_dataset
from src.benchmarks.get_semistruct import get_semistructured_data

dataset_name = 'amazon'

qa_dataset = get_qa_dataset(dataset_name)
kb = get_semistructured_data(dataset_name)

  from .autonotebook import tqdm as notebook_tqdm


loading dataset from external data
Load cached graph with meta link types ['brand']


### Load QA dataset

In [2]:
# Get one qa pair, we masked out metadata to avoid answer leaking
query, q_id, answer_ids, _ = qa_dataset[1]
print('Query:', query)
print('Query ID:', q_id)
print('Answer:\n', '\n'.join([kb[aid].title for aid in answer_ids]))

Query: Looking for a user-friendly fly fishing knot guide with clear, easy-to-understand illustrations. Ideally, it should be logically organised for easy learning and effective in teaching dependable knot tying techniques. It would be a bonus if it complements the Anglers Accessories Gehrke's Gink that I frequently use. Any recommendations?
Query ID: 30
Answer:
 Lake Products THREE-in-One Knot Tying Tool Fly Fishing
EZ Tie Blood Knot Tying Tool
BenchMaster Pocket Guide - Fly Fishing - Fishing


In [3]:
# We provide official random split for training, validation and test
print('Number of training examples:', len(qa_dataset.get_subset('train')))
print('Number of validation examples:', len(qa_dataset.get_subset('val')))
print('Number of test examples:', len(qa_dataset.get_subset('test')))

# Alternatively, you can get the split indices
qa_dataset.get_idx_split()

Number of training examples: 5910
Number of validation examples: 1548
Number of test examples: 1642


{'train': tensor([3885, 4522, 2110,  ..., 6839, 3967, 2814]),
 'val': tensor([1550, 1486, 6591,  ..., 5606, 1204, 3792]),
 'test': tensor([2905, 3863, 4651,  ..., 3891, 7631, 4472])}

### Load Knowledge Base

In [4]:
# You can see part of the knowledge base schema here
kb.get_tuples()

[('brand', 'has_brand', 'product'),
 ('product', 'also_buy', 'product'),
 ('product', 'also_view', 'product'),
 ('product', 'has_brand', 'brand')]

In [5]:
# Similarly, you can get the node and relation types 
kb.node_type_lst(), kb.rel_type_lst()

(['product', 'brand'], ['also_buy', 'also_view', 'has_brand'])

In [6]:
print('Number of nodes:', kb.num_nodes())
print('Number of edges:', kb.num_edges())

Number of nodes: 1032407
Number of edges: 6455692


In [7]:
# We include the attributes in node's textual information as part of the schema
# Note that some nodes may not have all attributes while some may have additional attributes
kb.node_attr_dict

{'product': ['title',
  'dimensions',
  'weight',
  'description',
  'features',
  'reviews',
  'Q&A'],
 'brand': ['brand_name']}

In [8]:
# Each node has textual information
print(kb.get_doc_info(answer_ids[0], add_rel=False))

- product: Lake Products THREE-in-One Knot Tying Tool Fly Fishing
- brand: Lake
- description: NEW & IMPROVED - Replaces the Two-in-One Knot Tying Tool - still ties many over 14 different knots, but now adds a magnetic hook threader; made of Delron and stainless steel; instruction book included.Precision machined contact firmly grips any fishing line, without causing damageUp and down spring action with stainless steel springThe body is manufactured of strong, lightweight Acetel Delrin for years of reliable serviceStainless Steel Shaft, head and loop will not rust or corrodeAttachment loop to clip onto clothing
- features: 
#1: Precision machined contact firmly grips any fishing line, without causing damage
#2: Up and down spring action with stainless steel spring
#3: The body is manufactured of strong, lightweight Acetel Delrin for years of reliable service
#4: Stainless Steel Shaft, head and loop will not rust or corrode
#5: Attachment loop to clip onto clothing
- reviews: 
#9:
summa

In [9]:
# Each node can be linked to other nodes
neighbor_lst = kb.get_neighbor_nodes(answer_ids[0], edge_type='*')
print('The neighbors of the answer node are:', len(neighbor_lst))

The neighbors of the answer node are: 222


In [10]:
# Count the number of each type
from collections import Counter
neighbor_types = [kb.get_node_type_by_id(neighbor) for neighbor in neighbor_lst]
print(Counter(neighbor_types))

Counter({'product': 221, 'brand': 1})


### Take PrimeKG as another example

In [11]:
dataset_name = 'primekg'

qa_dataset = get_qa_dataset(dataset_name)
kb = get_semistructured_data(dataset_name)

loading dataset from external data
Loaded from data/primekg/processed!


In [12]:
qa_dataset[1]

('What drugs target the CYP3A4 enzyme and are used to treat strongyloidiasis?',
 1,
 [15450],
 None)

In [13]:
print(kb.get_doc_info(15450, add_rel=False))

- name: Ivermectin
- type: drug
- source: DrugBank
- details:
  - description: Ivermectin is a broad-spectrum anti-parasite medication. It was first marketed under the name Stromectol® and used against worms (except tapeworms), but, in 2012, it was approved for the topical treatment of head lice infestations in patients 6 months of age and older, and marketed under the name Sklice™ as well. Ivermectin is mainly used in humans in the treatment of onchocerciasis, but is also effective against other worm infestations (such as strongyloidiasis, ascariasis, trichuriasis and enterobiasis).
  - half_life: 16 hours (also reported at 22-28 hours)
  - indication: For the treatment of intestinal (i.e., nondisseminated) strongyloidiasis due to the nematode parasite <i>Strongyloides stercoralis</i>. Also for the treatment of onchocerciasis (river blindness) due to the nematode parasite <i>Onchocerca volvulus</i>. Can be used to treat scabies caused by <i>Sarcoptes scabiei</i>.
  - mechanism_of_acti

In [14]:
print(kb.get_rel_info(15450))

- relations:

  enzyme: {gene/protein: (CYP3A4),}
  target: {gene/protein: (GABRB3, GLRA3),}
  transporter: {gene/protein: (ABCC2, ABCG2, ABCC1, ABCB1, SLCO1B1, SLCO1B3),}
  contraindication: {disease: (filariasis, loiasis),}
  indication: {disease: (onchocerciasis, strongyloidiasis),}
  synergistic_interaction: {drug: (Beclomethasone dipropionate, Betamethasone, Triamcinolone, Diethylstilbestrol, Liothyronine, Liotrix, Genistein, Ubidecarenone, Torasemide, Nelfinavir, Lovastatin, Ziprasidone, Phenytoin, Metoprolol, Dicoumarol, Conjugated estrogens, Etonogestrel, Desogestrel, Gefitinib, Meperidine, Duloxetine, Chlorpromazine, Raloxifene, Zidovudine, Ritonavir, Erlotinib, Ciprofloxacin, Nortriptyline, Methotrexate, Cephalexin, Clonidine, Enalapril, Medroxyprogesterone acetate, Chloroquine, Imatinib, Testosterone, Stavudine, Estrone, Tamoxifen, Warfarin, Lamivudine, Norethisterone, Irinotecan, Estradiol, Propofol, Clofazimine, Terbinafine, Tacrolimus, Quinidine, Repaglinide, Salmeterol, 