File size: 2,803 Bytes
ca01fa3
 
b6d30cb
ca01fa3
b6d30cb
ca01fa3
 
 
 
 
 
 
9e91869
ca01fa3
 
 
 
0c44583
5882a26
0c44583
b6d30cb
 
942065e
 
 
 
 
 
b6d30cb
 
 
 
 
ca01fa3
b5a8a95
0c44583
b6d30cb
 
 
 
 
76e9e8e
b6d30cb
5882a26
b6d30cb
5882a26
b5a8a95
 
b6d30cb
b5a8a95
 
 
 
 
 
 
 
 
 
 
 
 
 
 
5882a26
b5a8a95
 
 
 
 
 
 
 
 
 
9e91869
b6d30cb
a18645a
0c44583
d994c06
a18645a
 
 
 
d994c06
 
 
a18645a
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
'''Some operations. To be split into separate files when we have more.'''
from . import ops
import matplotlib
import networkx as nx
import pandas as pd

@ops.op("Import Parquet")
def import_parquet(*, filename: str):
  '''Imports a parquet file.'''
  return pd.read_parquet(filename)

@ops.op("Create scale-free graph")
def create_scale_free_graph(*, nodes: int = 10):
  '''Creates a scale-free graph with the given number of nodes.'''
  return nx.scale_free_graph(nodes)

@ops.op("Compute PageRank")
@ops.nx_node_attribute_func('pagerank')
def compute_pagerank(graph: nx.Graph, *, damping=0.85, iterations=100):
  return nx.pagerank(graph, alpha=damping, max_iter=iterations)


@ops.op("Sample graph")
def create_scale_free_graph(*, nodes: int = 10):
  '''Creates a scale-free graph with the given number of nodes.'''
  return nx.scale_free_graph(nodes)


def _map_color(value):
  cmap = matplotlib.cm.get_cmap('viridis')
  value = (value - value.min()) / (value.max() - value.min())
  rgba = cmap(value)
  return ['#{:02x}{:02x}{:02x}'.format(int(r*255), int(g*255), int(b*255)) for r, g, b in rgba[:, :3]]

@ops.op("Visualize graph", view="visualization")
def visualize_graph(graph: ops.Bundle, *, color_nodes_by: 'node_attribute' = None):
  nodes = graph.dfs['nodes'].copy()
  node_attributes = sorted(nodes.columns)
  if color_nodes_by:
    nodes['color'] = _map_color(nodes[color_nodes_by])
  nodes = nodes.to_records()
  edges = graph.dfs['edges'].drop_duplicates(['source', 'target'])
  edges = edges.to_records()
  pos = nx.spring_layout(graph.to_nx(), iterations=max(1, int(10000/len(nodes))))
  v = {
    'animationDuration': 500,
    'animationEasingUpdate': 'quinticInOut',
    'series': [
      {
        'type': 'graph',
        'roam': True,
        'lineStyle': {
          'color': 'gray',
          'curveness': 0.3,
        },
        'emphasis': {
          'focus': 'adjacency',
          'lineStyle': {
            'width': 10,
          }
        },
        'data': [
          {
            'id': str(n.id),
            'x': float(pos[n.id][0]), 'y': float(pos[n.id][1]),
            # Adjust node size to cover the same area no matter how many nodes there are.
            'symbolSize': 50 / len(nodes) ** 0.5,
            'itemStyle': {'color': n.color} if color_nodes_by else {},
          }
          for n in nodes],
        'links': [
          {'source': str(r.source), 'target': str(r.target)}
          for r in edges],
      },
    ],
  }
  return v

@ops.op("View tables", view="table_view")
def view_tables(bundle: ops.Bundle):
  v = {
    'dataframes': { name: {
      'columns': [str(c) for c in df.columns],
      'data': df.values.tolist(),
    } for name, df in bundle.dfs.items() },
    'relations': bundle.relations,
    'other': bundle.other,
  }
  return v