[1]:

%matplotlib inline
import networkx as nx
from networkx.drawing.nx_pydot import graphviz_layout
from networkx.drawing.layout import planar_layout
import matplotlib.pyplot as plt
from matplotlib.colors import LinearSegmentedColormap
import pandas as pd
import numpy as np

Graph concepts¶

elements¶

graph
vertex
edge
path

graph properties¶

directed or undirected
weighted or unweighted
cyclic or acyclic
single or multiple edges
connected or disconnected

vertex properties¶

in-degree
out-degree
centrality

edge properties¶

direction
weight
multiplicity

Graph representations¶

Vertex and edge collections
Adjacency list
Adjacency matrix
Sparse matrix

[2]:

%matplotlib inline
import networkx as nx
from pprint import pprint

[3]:

vs = ['a', 'b', 'c', 'd']
es = [('a','c', 1), ('b','c', 2), ('a','d', 3), ('c', 'd', 4)]

g1 = nx.DiGraph()
g1.add_nodes_from(vs)
g1.add_weighted_edges_from(es)

pos = nx.spring_layout(g1)
labels = nx.get_edge_attributes(g1,'weight')
nx.draw(g1, pos=pos, alpha=0.5)
nx.draw_networkx_edge_labels(g1, pos=pos, edge_labels=labels)
nx.draw_networkx_labels(g1, pos=pos)
pass

../_images/notebooks_A12A_Graph_Algorithms_5_0.png

[4]:

import sys

[5]:

nx.write_adjlist(g1, sys.stdout)

#/usr/local/lib/python3.8/site-packages/ipykernel_launcher.py -f /Users/cliburnchan/Library/Jupyter/runtime/kernel-cbc7a826-65d9-4be6-993f-cecdb1c227ba.json
# GMT Thu Nov 12 00:34:55 2020
#
a c d
b c
c d
d

[6]:

print(nx.adj_matrix(g1))

  (0, 2)        1
  (0, 3)        3
  (1, 2)        2
  (2, 3)        4

[7]:

nx.adj_matrix(g1).todense()

[7]:

matrix([[0, 0, 1, 3],
        [0, 0, 2, 0],
        [0, 0, 0, 4],
        [0, 0, 0, 0]])

[8]:

#### Standard format for graph exchange is GraphML

[9]:

nx.write_gml(g1, sys.stdout)

graph [
  directed 1
  node [
    id 0
    label "a"
  ]
  node [
    id 1
    label "b"
  ]
  node [
    id 2
    label "c"
  ]
  node [
    id 3
    label "d"
  ]
  edge [
    source 0
    target 2
    weight 1
  ]
  edge [
    source 0
    target 3
    weight 3
  ]
  edge [
    source 1
    target 2
    weight 2
  ]
  edge [
    source 2
    target 3
    weight 4
  ]
]

Some examples¶

[10]:

n = 50
k = 6
p =0.3
g = nx.watts_strogatz_graph(n, k, p)

Visual representations of the same graph may look very different¶

Complete graph¶

[11]:

nx.draw_circular(nx.complete_graph(10), alpha=0.5)

../_images/notebooks_A12A_Graph_Algorithms_16_0.png

[12]:

nx.draw(nx.complete_graph(10), alpha=0.5)

../_images/notebooks_A12A_Graph_Algorithms_17_0.png

[13]:

nx.draw_random(nx.complete_graph(10), alpha=0.5)

../_images/notebooks_A12A_Graph_Algorithms_18_0.png

Undirected graph¶

[14]:

nx.draw(nx.krackhardt_kite_graph(), alpha=0.5)

../_images/notebooks_A12A_Graph_Algorithms_20_0.png

Directed graph¶

[15]:

nx.draw(nx.gn_graph(25), alpha=0.5)

Graph Algorithms¶

[16]:

! python3 -m pip install --quiet pydot python-louvain

Search¶

[17]:

g = {
[2,3,4],
[5,6],
[],
[7,8],
[9,10],
[],
[11, 12],
[],
[],
[],
[],
[]
}

[18]:

G = nx.from_dict_of_lists(g)

[19]:

pos = graphviz_layout(G, prog='dot')

[20]:

nx.draw(G, pos, with_labels=True, node_size=600, node_color='lightblue')

../_images/notebooks_A12A_Graph_Algorithms_29_0.png

Depth first search¶

[21]:

list(nx.dfs_tree(G, 1))

[21]:

[1, 2, 5, 9, 10, 6, 3, 4, 7, 11, 12, 8]

Breadth first search¶

[22]:

list(nx.bfs_tree(G, 1))

[22]:

[1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12]

Pathfinding¶

[23]:

edges = [
    ('A', 'B', 8),
    ('A', 'C', 5),
    ('A', 'D', 1),
    ('B', 'C', 6),
    ('C', 'D', 3),
    ('C', 'E', 2),
    ('D', 'E', 4)
]

[24]:

G = nx.Graph()
G.add_weighted_edges_from(edges)

[25]:

pos = planar_layout(G)
nx.draw(G,
        pos,
        with_labels=True,
        node_size=600,
        node_color='lightblue')
labels = nx.draw_networkx_edge_labels(
    G,
    pos,
    edge_labels={(x[0], x[1]): str(x[2]) for x in edges},
    font_size=15,
)

../_images/notebooks_A12A_Graph_Algorithms_37_0.png

Shortest path¶

[26]:

nx.dijkstra_path(G, 'A', 'C')

[26]:

['A', 'D', 'C']

Single source shortest path¶

[27]:

list(nx.single_source_dijkstra(G, 'A'))

[27]:

[{'A': 0, 'D': 1, 'C': 4, 'E': 5, 'B': 8},
 {'A': ['A'],
  'B': ['A', 'B'],
  'C': ['A', 'D', 'C'],
  'D': ['A', 'D'],
  'E': ['A', 'D', 'E']}]

All pairs shortest path¶

[28]:

list(nx.all_pairs_dijkstra_path(G))

[28]:

[('A',
  {'A': ['A'],
   'B': ['A', 'B'],
   'C': ['A', 'D', 'C'],
   'D': ['A', 'D'],
   'E': ['A', 'D', 'E']}),
 ('B',
  {'B': ['B'],
   'A': ['B', 'A'],
   'C': ['B', 'C'],
   'D': ['B', 'C', 'D'],
   'E': ['B', 'C', 'E']}),
 ('C',
  {'C': ['C'],
   'A': ['C', 'D', 'A'],
   'B': ['C', 'B'],
   'D': ['C', 'D'],
   'E': ['C', 'E']}),
 ('D',
  {'D': ['D'],
   'A': ['D', 'A'],
   'C': ['D', 'C'],
   'E': ['D', 'E'],
   'B': ['D', 'A', 'B']}),
 ('E',
  {'E': ['E'],
   'C': ['E', 'C'],
   'D': ['E', 'D'],
   'A': ['E', 'D', 'A'],
   'B': ['E', 'C', 'B']})]

Minimal spanning tree¶

[29]:

G_min = nx.minimum_spanning_tree(G)

[30]:

pos = planar_layout(G_min)
nx.draw(G_min,
        pos,
        with_labels=True,
        node_size=600,
        node_color='lightblue')
labels = nx.draw_networkx_edge_labels(
    G_min,
    pos,
    edge_labels={(x[0], x[1]): str(x[2]) for x in edges},
    font_size=15,
)

../_images/notebooks_A12A_Graph_Algorithms_46_0.png

Centrality¶

[31]:

G = nx.karate_club_graph()

[32]:

pos = nx.kamada_kawai_layout(G)

[33]:

nx.draw(G, pos)

../_images/notebooks_A12A_Graph_Algorithms_50_0.png

[34]:

plt.viridis()

<Figure size 432x288 with 0 Axes>

[35]:

values = nx.degree(G)
n_color = np.asarray([values(n) for n in G.nodes()])
nx.draw(G, pos, node_color=n_color)

../_images/notebooks_A12A_Graph_Algorithms_52_0.png

[36]:

help(nx.closeness_centrality)

Help on function closeness_centrality in module networkx.algorithms.centrality.closeness:

closeness_centrality(G, u=None, distance=None, wf_improved=True)
    Compute closeness centrality for nodes.

    Closeness centrality [1]_ of a node `u` is the reciprocal of the
    average shortest path distance to `u` over all `n-1` reachable nodes.

    .. math::

        C(u) = \frac{n - 1}{\sum_{v=1}^{n-1} d(v, u)},

    where `d(v, u)` is the shortest-path distance between `v` and `u`,
    and `n` is the number of nodes that can reach `u`. Notice that the
    closeness distance function computes the incoming distance to `u`
    for directed graphs. To use outward distance, act on `G.reverse()`.

    Notice that higher values of closeness indicate higher centrality.

    Wasserman and Faust propose an improved formula for graphs with
    more than one connected component. The result is "a ratio of the
    fraction of actors in the group who are reachable, to the average
    distance" from the reachable actors [2]_. You might think this
    scale factor is inverted but it is not. As is, nodes from small
    components receive a smaller closeness value. Letting `N` denote
    the number of nodes in the graph,

    .. math::

        C_{WF}(u) = \frac{n-1}{N-1} \frac{n - 1}{\sum_{v=1}^{n-1} d(v, u)},

    Parameters
    ----------
    G : graph
      A NetworkX graph

    u : node, optional
      Return only the value for node u

    distance : edge attribute key, optional (default=None)
      Use the specified edge attribute as the edge distance in shortest
      path calculations

    wf_improved : bool, optional (default=True)
      If True, scale by the fraction of nodes reachable. This gives the
      Wasserman and Faust improved formula. For single component graphs
      it is the same as the original formula.

    Returns
    -------
    nodes : dictionary
      Dictionary of nodes with closeness centrality as the value.

    See Also
    --------
    betweenness_centrality, load_centrality, eigenvector_centrality,
    degree_centrality, incremental_closeness_centrality

    Notes
    -----
    The closeness centrality is normalized to `(n-1)/(|G|-1)` where
    `n` is the number of nodes in the connected part of graph
    containing the node.  If the graph is not completely connected,
    this algorithm computes the closeness centrality for each
    connected part separately scaled by that parts size.

    If the 'distance' keyword is set to an edge attribute key then the
    shortest-path length will be computed using Dijkstra's algorithm with
    that edge attribute as the edge weight.

    The closeness centrality uses *inward* distance to a node, not outward.
    If you want to use outword distances apply the function to `G.reverse()`

    In NetworkX 2.2 and earlier a bug caused Dijkstra's algorithm to use the
    outward distance rather than the inward distance. If you use a 'distance'
    keyword and a DiGraph, your results will change between v2.2 and v2.3.

    References
    ----------
    .. [1] Linton C. Freeman: Centrality in networks: I.
       Conceptual clarification. Social Networks 1:215-239, 1979.
       http://leonidzhukov.ru/hse/2013/socialnetworks/papers/freeman79-centrality.pdf
    .. [2] pg. 201 of Wasserman, S. and Faust, K.,
       Social Network Analysis: Methods and Applications, 1994,
       Cambridge University Press.

[37]:

values = nx.closeness_centrality(G)
n_color = np.asarray([values[n] for n in G.nodes()])
nx.draw(G, pos, node_color=n_color)

../_images/notebooks_A12A_Graph_Algorithms_54_0.png

[38]:

help(nx.pagerank)

Help on function pagerank in module networkx.algorithms.link_analysis.pagerank_alg:

pagerank(G, alpha=0.85, personalization=None, max_iter=100, tol=1e-06, nstart=None, weight='weight', dangling=None)
Returns the PageRank of the nodes in the graph.

PageRank computes a ranking of the nodes in the graph G based on
the structure of the incoming links. It was originally designed as
an algorithm to rank web pages.

Parameters
----------
G : graph
A NetworkX graph. Undirected graphs will be converted to a directed
graph with two directed edges for each undirected edge.

alpha : float, optional
Damping parameter for PageRank, default=0.85.

personalization: dict, optional
The "personalization vector" consisting of a dictionary with a
key some subset of graph nodes and personalization value each of those.
At least one personalization value must be non-zero.
If not specfiied, a nodes personalization value will be zero.
By default, a uniform distribution is used.

max_iter : integer, optional
Maximum number of iterations in power method eigenvalue solver.

tol : float, optional
Error tolerance used to check convergence in power method solver.

nstart : dictionary, optional
Starting value of PageRank iteration for each node.

weight : key, optional
Edge data key to use as weight. If None weights are set to 1.

dangling: dict, optional
The outedges to be assigned to any "dangling" nodes, i.e., nodes without
any outedges. The dict key is the node the outedge points to and the dict
value is the weight of that outedge. By default, dangling nodes are given
outedges according to the personalization vector (uniform if not
specified). This must be selected to result in an irreducible transition
matrix (see notes under google_matrix). It may be common to have the
dangling dict to be the same as the personalization dict.

Returns
-------
pagerank : dictionary
Dictionary of nodes with PageRank as value

Examples
--------
>>> G = nx.DiGraph(nx.path_graph(4))
>>> pr = nx.pagerank(G, alpha=0.9)

Notes
-----
The eigenvector calculation is done by the power iteration method
and has no guarantee of convergence. The iteration will stop after
an error tolerance of ``len(G) * tol`` has been reached. If the
number of iterations exceed `max_iter`, a
:exc:`networkx.exception.PowerIterationFailedConvergence` exception
is raised.

The PageRank algorithm was designed for directed graphs but this
algorithm does not check if the input graph is directed and will
execute on undirected graphs by converting each edge in the
directed graph to two edges.

See Also
--------
pagerank_numpy, pagerank_scipy, google_matrix

Raises
------
PowerIterationFailedConvergence
If the algorithm fails to converge to the specified tolerance
within the specified number of iterations of the power iteration
method.

References
----------
.. [1] A. Langville and C. Meyer,
"A survey of eigenvector methods of web information retrieval."
http://citeseer.ist.psu.edu/713792.html
.. [2] Page, Lawrence; Brin, Sergey; Motwani, Rajeev and Winograd, Terry,
The PageRank citation ranking: Bringing order to the Web. 1999
http://dbpubs.stanford.edu:8090/pub/showDoc.Fulltext?lang=en&doc=1999-66&format=pdf

[39]:

values = nx.pagerank(G)
n_color = np.asarray([values[n] for n in G.nodes()])
nx.draw(G, pos, node_color=n_color)

../_images/notebooks_A12A_Graph_Algorithms_56_0.png

Community Detection¶

Triangles¶

[40]:

help(nx.triangles)

Help on function triangles in module networkx.algorithms.cluster:

triangles(G, nodes=None)
    Compute the number of triangles.

    Finds the number of triangles that include a node as one vertex.

    Parameters
    ----------
    G : graph
       A networkx graph
    nodes : container of nodes, optional (default= all nodes in G)
       Compute triangles for nodes in this container.

    Returns
    -------
    out : dictionary
       Number of triangles keyed by node label.

    Examples
    --------
    >>> G = nx.complete_graph(5)
    >>> print(nx.triangles(G, 0))
    6
    >>> print(nx.triangles(G))
    {0: 6, 1: 6, 2: 6, 3: 6, 4: 6}
    >>> print(list(nx.triangles(G, (0, 1)).values()))
    [6, 6]

    Notes
    -----
    When computing triangles for the entire graph each triangle is counted
    three times, once at each node.  Self loops are ignored.

[41]:

values = nx.triangles(G)
n_color = np.asarray([values[n] for n in G.nodes()])
nx.draw(G, pos, node_color=n_color)

../_images/notebooks_A12A_Graph_Algorithms_60_0.png

Clustering coefficient¶

[42]:

help(nx.clustering)

Help on function clustering in module networkx.algorithms.cluster:

clustering(G, nodes=None, weight=None)
    Compute the clustering coefficient for nodes.

    For unweighted graphs, the clustering of a node :math:`u`
    is the fraction of possible triangles through that node that exist,

    .. math::

      c_u = \frac{2 T(u)}{deg(u)(deg(u)-1)},

    where :math:`T(u)` is the number of triangles through node :math:`u` and
    :math:`deg(u)` is the degree of :math:`u`.

    For weighted graphs, there are several ways to define clustering [1]_.
    the one used here is defined
    as the geometric average of the subgraph edge weights [2]_,

    .. math::

       c_u = \frac{1}{deg(u)(deg(u)-1))}
             \sum_{vw} (\hat{w}_{uv} \hat{w}_{uw} \hat{w}_{vw})^{1/3}.

    The edge weights :math:`\hat{w}_{uv}` are normalized by the maximum weight
    in the network :math:`\hat{w}_{uv} = w_{uv}/\max(w)`.

    The value of :math:`c_u` is assigned to 0 if :math:`deg(u) < 2`.

    For directed graphs, the clustering is similarly defined as the fraction
    of all possible directed triangles or geometric average of the subgraph
    edge weights for unweighted and weighted directed graph respectively [3]_.

    .. math::

       c_u = \frac{1}{deg^{tot}(u)(deg^{tot}(u)-1) - 2deg^{\leftrightarrow}(u)}
             T(u),

    where :math:`T(u)` is the number of directed triangles through node
    :math:`u`, :math:`deg^{tot}(u)` is the sum of in degree and out degree of
    :math:`u` and :math:`deg^{\leftrightarrow}(u)` is the reciprocal degree of
    :math:`u`.

    Parameters
    ----------
    G : graph

    nodes : container of nodes, optional (default=all nodes in G)
       Compute clustering for nodes in this container.

    weight : string or None, optional (default=None)
       The edge attribute that holds the numerical value used as a weight.
       If None, then each edge has weight 1.

    Returns
    -------
    out : float, or dictionary
       Clustering coefficient at specified nodes

    Examples
    --------
    >>> G = nx.complete_graph(5)
    >>> print(nx.clustering(G, 0))
    1.0
    >>> print(nx.clustering(G))
    {0: 1.0, 1: 1.0, 2: 1.0, 3: 1.0, 4: 1.0}

    Notes
    -----
    Self loops are ignored.

    References
    ----------
    .. [1] Generalizations of the clustering coefficient to weighted
       complex networks by J. Saramäki, M. Kivelä, J.-P. Onnela,
       K. Kaski, and J. Kertész, Physical Review E, 75 027105 (2007).
       http://jponnela.com/web_documents/a9.pdf
    .. [2] Intensity and coherence of motifs in weighted complex
       networks by J. P. Onnela, J. Saramäki, J. Kertész, and K. Kaski,
       Physical Review E, 71(6), 065103 (2005).
    .. [3] Clustering in complex directed networks by G. Fagiolo,
       Physical Review E, 76(2), 026107 (2007).

[43]:

values = nx.clustering(G)
n_color = np.asarray([values[n] for n in G.nodes()])
nx.draw(G, pos, node_color=n_color)

../_images/notebooks_A12A_Graph_Algorithms_63_0.png

[44]:

import matplotlib.colors as mcolors

[45]:

colors = list(mcolors.BASE_COLORS.keys())

Label propagation¶

[46]:

help(nx.community.label_propagation_communities)

Help on function label_propagation_communities in module networkx.algorithms.community.label_propagation:

label_propagation_communities(G)
    Generates community sets determined by label propagation

    Finds communities in `G` using a semi-synchronous label propagation
    method[1]_. This method combines the advantages of both the synchronous
    and asynchronous models. Not implemented for directed graphs.

    Parameters
    ----------
    G : graph
        An undirected NetworkX graph.

    Yields
    ------
    communities : generator
        Yields sets of the nodes in each community.

    Raises
    ------
    NetworkXNotImplemented
       If the graph is directed

    References
    ----------
    .. [1] Cordasco, G., & Gargano, L. (2010, December). Community detection
       via semi-synchronous label propagation algorithms. In Business
       Applications of Social Network Analysis (BASNA), 2010 IEEE International
       Workshop on (pp. 1-8). IEEE.

[47]:

parts = list(nx.community.label_propagation_communities(G))
values = {n: i for i, ns in enumerate(parts) for n in ns}
n_color = np.asarray([values[n] for n in G.nodes()])
nx.draw(G, pos, node_color=n_color)

../_images/notebooks_A12A_Graph_Algorithms_68_0.png

Clauset-Newman-Moore greedy modularity¶

Definition - Modularity is the fraction of the edges that fall within the given groups minus the expected fraction if edges were distributed at random.

[48]:

help(nx.community.greedy_modularity_communities)

Help on function greedy_modularity_communities in module networkx.algorithms.community.modularity_max:

greedy_modularity_communities(G, weight=None)
    Find communities in graph using Clauset-Newman-Moore greedy modularity
    maximization. This method currently supports the Graph class and does not
    consider edge weights.

    Greedy modularity maximization begins with each node in its own community
    and joins the pair of communities that most increases modularity until no
    such pair exists.

    Parameters
    ----------
    G : NetworkX graph

    Returns
    -------
    Yields sets of nodes, one for each community.

    Examples
    --------
    >>> from networkx.algorithms.community import greedy_modularity_communities
    >>> G = nx.karate_club_graph()
    >>> c = list(greedy_modularity_communities(G))
    >>> sorted(c[0])
    [8, 14, 15, 18, 20, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33]

    References
    ----------
    .. [1] M. E. J Newman 'Networks: An Introduction', page 224
       Oxford University Press 2011.
    .. [2] Clauset, A., Newman, M. E., & Moore, C.
       "Finding community structure in very large networks."
       Physical Review E 70(6), 2004.

[49]:

parts = list(nx.community.greedy_modularity_communities(G))
values = {n: i for i, ns in enumerate(parts) for n in ns}
n_color = np.asarray([values[n] for n in G.nodes()])
nx.draw(G, pos, node_color=n_color)

../_images/notebooks_A12A_Graph_Algorithms_71_0.png

Louvain¶

This works well on large graphs, and is used to cluster single-cell RNA-seq data into cell subsets.

[50]:

import community as community_louvain

[51]:

help(community_louvain.best_partition)

Help on function best_partition in module community.community_louvain:

best_partition(graph, partition=None, weight='weight', resolution=1.0, randomize=None, random_state=None)
    Compute the partition of the graph nodes which maximises the modularity
    (or try..) using the Louvain heuristices

    This is the partition of highest modularity, i.e. the highest partition
    of the dendrogram generated by the Louvain algorithm.

    Parameters
    ----------
    graph : networkx.Graph
       the networkx graph which is decomposed
    partition : dict, optional
       the algorithm will start using this partition of the nodes.
       It's a dictionary where keys are their nodes and values the communities
    weight : str, optional
        the key in graph to use as weight. Default to 'weight'
    resolution :  double, optional
        Will change the size of the communities, default to 1.
        represents the time described in
        "Laplacian Dynamics and Multiscale Modular Structure in Networks",
        R. Lambiotte, J.-C. Delvenne, M. Barahona
    randomize : boolean, optional
        Will randomize the node evaluation order and the community evaluation
        order to get different partitions at each call
    random_state : int, RandomState instance or None, optional (default=None)
        If int, random_state is the seed used by the random number generator;
        If RandomState instance, random_state is the random number generator;
        If None, the random number generator is the RandomState instance used
        by `np.random`.

    Returns
    -------
    partition : dictionnary
       The partition, with communities numbered from 0 to number of communities

    Raises
    ------
    NetworkXError
       If the graph is not Eulerian.

    See Also
    --------
    generate_dendrogram to obtain all the decompositions levels

    Notes
    -----
    Uses Louvain algorithm

    References
    ----------
    .. 1. Blondel, V.D. et al. Fast unfolding of communities in
    large networks. J. Stat. Mech 10008, 1-12(2008).

    Examples
    --------
    >>>  #Basic usage
    >>> G=nx.erdos_renyi_graph(100, 0.01)
    >>> part = best_partition(G)

    >>> #other example to display a graph with its community :
    >>> #better with karate_graph() as defined in networkx examples
    >>> #erdos renyi don't have true community structure
    >>> G = nx.erdos_renyi_graph(30, 0.05)
    >>> #first compute the best partition
    >>> partition = community.best_partition(G)
    >>>  #drawing
    >>> size = float(len(set(partition.values())))
    >>> pos = nx.spring_layout(G)
    >>> count = 0.
    >>> for com in set(partition.values()) :
    >>>     count += 1.
    >>>     list_nodes = [nodes for nodes in partition.keys()
    >>>                                 if partition[nodes] == com]
    >>>     nx.draw_networkx_nodes(G, pos, list_nodes, node_size = 20,
                                    node_color = str(count / size))
    >>> nx.draw_networkx_edges(G, pos, alpha=0.5)
    >>> plt.show()

[52]:

partion = community_louvain.best_partition(G)
values = {n: i for i, ns in enumerate(parts) for n in ns}
n_color = np.asarray([values[n] for n in G.nodes()])
nx.draw(G, pos, node_color=n_color)

../_images/notebooks_A12A_Graph_Algorithms_75_0.png