Source code for causal_networkx.io

import networkx as nx
import numpy as np

from causal_networkx import ADMG, CPDAG, DAG, PAG
from causal_networkx.config import (
    EDGE_TO_VALUE_MAPPING,
    ENDPOINT_TO_EDGE_MAPPING,
    GRAPH_TYPES,
    VALUE_TO_MIXED_EDGE_MAPPING,
    EdgeType,
    EndPoint,
)

GRAPH_TYPE = {DAG: "DAG", ADMG: "ADMG", CPDAG: "CPDAG", PAG: "PAG"}
GRAPH_TYPE_TO_FUNC = {val: key for key, val in GRAPH_TYPE.items()}


[docs]def load_from_networkx(G: nx.Graph): """Load causal graph from networkx. Parameters ---------- G : nx.DiGraph | nx.MultiDiGraph The networkx graph, which contains multiple edges if there are edge attributes needed for each edge between nodes. The edge attributes encode which type of edge it is. See Notes. Returns ------- graph : instance of causal DAG The causal graph. Notes ----- Networkx does not support mixed edge graphs implicitly. However, they do support edge attributes. A `networkx.DiGraph` encodes a normal causal :class:`causal_networkx.DAG`, while a `networkx.MultiDiGraph` encodes all other causal graphs, such as :class:`causal_networkx.CPDAG`, :class:`causal_networkx.PAG`, :class:`causal_networkx.ADMG` by storing the different edges as edge attributes in the keyword "type". Moreover, the graph type is stored in the "graph_type" networkx graph attribute. """ graph_func = GRAPH_TYPE_TO_FUNC[G.graph["graph_type"]] name = G.name graph = graph_func() graph.name = name # add all nodes to the causal graph graph.add_nodes_from(G.nodes) # now add all edges for u, v, edge_attrs in G.edges.data(): edge_type = edge_attrs["type"] # replace edge marks with their appropriate string representation if edge_type == EdgeType.directed.value: graph.add_edge(u, v) elif edge_type == EdgeType.undirected.value: graph.add_undirected_edge(u, v) elif edge_type == EdgeType.bidirected.value: graph.add_bidirected_edge(u, v) elif edge_type == EndPoint.circle.value: graph.add_circle_endpoint(u, v) return graph
[docs]def to_networkx(causal_graph: DAG): """Convert causal graph to networkx class. Parameters ---------- causal_graph : DAG A causal graph. Returns ------- G : nx.MultiDiGraph The networkx directed graph with multiple edges with edge attributes indicating via the keyword "type", which type of causal edge it is. """ if len(causal_graph._graphs) == 1: G = nx.DiGraph() else: G = nx.MultiDiGraph() # preserve the name G.name = causal_graph.name graph_type = type(causal_graph).__name__ # GRAPH_TYPE[type(causal_graph)] G.graph["graph_type"] = graph_type # add all nodes to the networkx graph G.add_nodes_from(causal_graph.nodes) # add all the edges for name, graph in zip(causal_graph._graph_names, causal_graph._graphs): # replace edge marks with their appropriate string representation if name == EdgeType.directed.value: attr = {"type": EdgeType.directed.value} elif name == EdgeType.undirected.value: attr = {"type": EdgeType.undirected.value} elif name == EdgeType.bidirected.value: attr = {"type": EdgeType.bidirected.value} elif name == EndPoint.circle.value: attr = {"type": EndPoint.circle.value} G.add_edges_from(graph.edges, **attr) return G
def load_from_dot(graph, dagitty: bool = False): """Load causal graph from pyDot graph. Parameters ---------- graph : _type_ _description_ dagitty : bool, optional _description_, by default False Returns ------- _type_ _description_ """ # multiple edges are not allowed assert graph.get_strict(None) if dagitty: assert graph.get_type() == "dag" else: assert graph.get_type() == "digraph" # now read the graph N = DAG() # assign name of the graph name = graph.get_name().strip('"') if name != "": N.name = name # add nodes and attributes for p in graph.get_node_list(): n = p.get_name().strip('"') if n in ("node", "graph", "edge"): continue N.add_node(n, **p.get_attributes()) # add edges for e in graph.get_edge_list(): u = e.get_source() v = e.get_destination() attr = e.get_attributes() s = [] d = [] if isinstance(u, str): s.append(u.strip('"')) else: for unodes in u["nodes"]: s.append(unodes.strip('"')) if isinstance(v, str): d.append(v.strip('"')) else: for vnodes in v["nodes"]: d.append(vnodes.strip('"')) for source_node in s: for destination_node in d: N.add_edge(source_node, destination_node, **attr) # add default attributes pattr = graph.get_attributes() if pattr: N.dag["graph"] = pattr try: N.dag["node"] = graph.get_node_defaults()[0] except (IndexError, TypeError): pass # N.graph['node']={} try: N.dag["edge"] = graph.get_edge_defaults()[0] except (IndexError, TypeError): pass # N.graph['edge']={} return N
[docs]def load_from_pgmpy(pgmpy_dag) -> DAG: """Load causal graph from pgmpy. Parameters ---------- pgmpy_dag : pgmpy.models.BayesianNetwork The Bayesian network from PGMPY. Returns ------- dag : DAG The causal Bayesian Network. """ adjmat_df = pgmpy_dag["adjmat"] # create the causal DAG digraph = nx.from_pandas_adjacency(adjmat_df, create_using=nx.DiGraph) dag = DAG(digraph) return dag
[docs]def load_from_numpy(arr, type="dag"): """Load causal graph from a numpy array. # TODO: add sparse support. Parameters ---------- arr : np.ndarray of shape (n_nodes, n_nodes) A numpy array specifying the connections between nodes, where the ijth component specifies the edge from i to j. type : str, optional The type of causal graph, by default 'dag'. Must be one of ``('dag', ``cpdag``, ``admg``, ``pag``)``. For mixed-edge graphs, the ``arr`` specified will have specific values mapped to specific edges. Returns ------- G : instance of DAG | CPDAG An instance of a causal graph. Notes ----- Numpy support for ADMGs are not supported yet, as nodes can have two edges between any two nodes (i.e. a directed edge and a bidirected edge). """ n, m = arr.shape if n != m: raise nx.NetworkXError(f"Adjacency matrix not square: nx,ny={arr.shape}") if type not in GRAPH_TYPES: raise ValueError( f'"type" needs to be one of accepted graph types {GRAPH_TYPES}, not {type}.' ) if type == "dag": nx_graph = nx.from_numpy_array(arr, create_using=nx.DiGraph) G = DAG(nx_graph) elif type == "cpdag": G = CPDAG() # Make sure we get even the isolated nodes of the graph. G.add_nodes_from(range(n)) # Get a list of all the entries in the array with nonzero entries. These # coordinates become edges in the graph. (convert to int from np.int64) for e in zip(*arr.nonzero()): idx = e[0] jdx = e[1] # get the endpoint value for the ijth connection endpoint_ij = VALUE_TO_MIXED_EDGE_MAPPING.get(arr[idx, jdx]) # check the other endpoint for jith endpoint_ji = VALUE_TO_MIXED_EDGE_MAPPING.get(arr[jdx, idx]) # now map these endpoints to edges that are added to the graph edge_type = ENDPOINT_TO_EDGE_MAPPING[(endpoint_ij, endpoint_ji)] if edge_type == EdgeType.directed.value: # just add directed edge from i to j G.add_edge(idx, jdx) elif edge_type == EdgeType.undirected.value: G.add_undirected_edge(idx, jdx) return G
[docs]def to_numpy(causal_graph): """Convert causal graph to a numpy adjacency array. Parameters ---------- causal_graph : instance of DAG The causal graph. Returns ------- numpy_graph : np.ndarray of shape (n_nodes, n_nodes) The numpy array that represents the graph. The values representing edges are mapped according to a pre-defined set of values. See Notes. Notes ----- The adjacency matrix is defined where the ijth entry of ``numpy_graph`` has a non-zero entry if there is an edge from i to j. The ijth entry is symmetric with the jith entry if the edge is 'undirected', or 'bidirected'. Then specific edges are mapped to the following values: - directed edge (->): 1 - undirected edge (--): 2 - bidirected edge (<->): 3 - circle endpoint (-o): 4 Circle endpoints can be symmetric, but they can also contain a tail, or a directed edge at the other end. """ if isinstance(causal_graph, ADMG): raise RuntimeError("Converting ADMG to numpy format is not supported.") # master list of nodes is in the internal dag node_list = causal_graph.nodes n_nodes = len(node_list) numpy_graph = np.zeros((n_nodes, n_nodes)) bidirected_graph_arr = None graph_map = dict() for name, graph in zip(causal_graph._graph_names, causal_graph._graphs): # make sure all nodes are in the internal graph if any(node not in graph for node in node_list): graph = graph.copy() graph.add_nodes_from(node_list) # handle bidirected edge separately if name == EdgeType.bidirected.value: bidirected_graph_arr = nx.to_numpy_array(graph, nodelist=node_list) continue # convert internal graph to a numpy array graph_arr = nx.to_numpy_array(graph, nodelist=node_list) graph_map[name] = graph_arr # ADMGs can have two edges between any 2 nodes if type(causal_graph).__name__ == "ADMG": # we handle this case separately from the other graphs assert len(graph_map) == 1 # set all bidirected edges with value 10 bidirected_graph_arr[bidirected_graph_arr != 0] = 10 numpy_graph += bidirected_graph_arr numpy_graph += graph_arr else: # map each edge to an edge value for name, graph_arr in graph_map.items(): graph_arr[graph_arr != 0] = EDGE_TO_VALUE_MAPPING[name] numpy_graph += graph_arr # bidirected case is handled separately if bidirected_graph_arr is not None: numpy_graph += bidirected_graph_arr return numpy_graph
def read_dot(fname: str): """Read DOT graph from file.""" import pydot if fname.endswith(".dot"): graph = pydot.graph_from_dot_file(fname) elif fname.endswith(".txt"): # read txt file with open(fname, "r") as f: graph = f.readlines() graph = "".join(graph) graph = pydot.graph_from_dot_data(graph) assert len(graph) == 1 graph = graph[0] nx_graph = nx.drawing.nx_pydot.from_pydot(graph) dag = DAG(nx_graph) return dag def read_gml(fname): """Read .gml file from disc to causal-networkx graph.""" nx_graph = nx.read_gml(fname) graph = load_from_networkx(nx_graph) return graph