Spaces:
Running
Running
| '''Some operations. To be split into separate files when we have more.''' | |
| from . import ops | |
| import matplotlib | |
| import networkx as nx | |
| import pandas as pd | |
| def import_parquet(*, filename: str): | |
| '''Imports a parquet file.''' | |
| return pd.read_parquet(filename) | |
| def create_scale_free_graph(*, nodes: int = 10): | |
| '''Creates a scale-free graph with the given number of nodes.''' | |
| return nx.scale_free_graph(nodes) | |
| def compute_pagerank(graph: nx.Graph, *, damping=0.85, iterations=3): | |
| return nx.pagerank(graph, alpha=damping, max_iter=iterations) | |
| def _map_color(value): | |
| cmap = matplotlib.cm.get_cmap('viridis') | |
| value = (value - value.min()) / (value.max() - value.min()) | |
| rgba = cmap(value) | |
| return ['#{:02x}{:02x}{:02x}'.format(int(r*255), int(g*255), int(b*255)) for r, g, b in rgba[:, :3]] | |
| def visualize_graph(graph: ops.Bundle, *, color_nodes_by: 'node_attribute' = None): | |
| nodes = graph.dfs['nodes'].copy() | |
| node_attributes = sorted(nodes.columns) | |
| if color_nodes_by: | |
| nodes['color'] = _map_color(nodes[color_nodes_by]) | |
| nodes = nodes.to_records() | |
| edges = graph.dfs['edges'].drop_duplicates(['source', 'target']) | |
| edges = edges.to_records() | |
| v = { | |
| 'node_attributes': node_attributes, | |
| 'attributes': {}, | |
| 'options': {}, | |
| 'nodes': [ | |
| { | |
| 'key': str(n.id), | |
| 'attributes': {'color': n.color, 'size': 5} if color_nodes_by else {} | |
| } | |
| for n in nodes], | |
| 'edges': [ | |
| {'key': str(r.source) + ' -> ' + str(r.target), 'source': str(r.source), 'target': str(r.target)} | |
| for r in edges], | |
| } | |
| return v | |
| def view_tables(bundle: ops.Bundle): | |
| v = { | |
| 'dataframes': { name: { | |
| 'columns': [str(c) for c in df.columns], | |
| 'data': df.values.tolist(), | |
| } for name, df in bundle.dfs.items() }, | |
| 'relations': bundle.relations, | |
| 'other': bundle.other, | |
| } | |
| return v | |