Source code for networkqit.algorithms.community

#! /usr/bin/env python
# -*- coding: utf-8 -*-
#
# networkqit -- a python module for manipulations of spectral entropies framework
#
# Copyright (C) 2017-2018 Carlo Nicolini <carlo.nicolini@iit.it>
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program.  If not, see <http://www.gnu.org/licenses/>.

"""
Here a number of community detection utilities are implemented for the manipulation
of membership vectors, as well as for the detection of block structures in networks.
"""

import autograd.numpy as np

__all__ =  ['comm_mat',
            'comm_assortativity',
            'reindex_membership',
            'reassign_singletons'
            ]

[docs]def comm_mat(adj, memb):
    """
    Returns multiple community related quantities.

    Input
    -----
        adj : :class:`~np.array`: a possibly weighted n x n adjacency matrix in the form of a N x N numpy array
        memb: :class:`~np.array, list`: the nodal membership vector, a numpy array or list of integers.

    Ouput
    -----
        B: :class`~np.array`: the CxC block matrix containing in its (r,s) element the number of links from community
        `r` to community `s`.
        Bnorm: :class`~np.array`: the CxC block matrix containing in its (r,s) element the density of links.
        It is the same as B but divided by the actual number of possible node pairs nr x ns for off-diagonal terms,
        `(nr x (nr-1))/2` for diagonal elements.


    References
    ----------
    .. [holland-stochastic-1983] Paul W. Holland, Kathryn Blackmond Laskey,
       Samuel Leinhardt, "Stochastic blockmodels: First steps",
       Carnegie-Mellon University, Pittsburgh, PA 15213, U.S.A.,
       :doi:`10.1016/0378-8733(83)90021-7`.
    """
    
    u = np.unique(memb)
    C = np.zeros([len(u), len(memb)])
    for i in range(C.shape[0]):
        for j in range(0, len(memb)):
            C[i, j] = memb[j] == u[i]
    B = np.dot(np.dot(C, adj), C.T)
    K = B.sum(axis=1)
    #print(K**2)
    np.fill_diagonal(B, B.diagonal()/2)
    # n = len(adj)
    # m = np.triu(W, 1).sum()
    # p = n * (n - 1) / 2
    commsizes = C.sum(axis=1)
    commpairs = np.dot(np.diag(commsizes), np.diag(commsizes-1)) / 2
    commpairs2 = np.dot(np.dot(np.diag(commsizes), np.ones([len(commsizes), len(commsizes)])), np.diag(commsizes))
    blockpairs = np.multiply(commpairs2, (1-np.eye(len(commsizes)))) + commpairs
    Bnorm = B / blockpairs
    return B, Bnorm

[docs]def comm_assortativity(A, memb):
    """
    This function computes the modular group assortativity and the newman modularity as from Eq.18-19
    of the Peixoto paper "Nonparametric weighted stochastic block models"
    https://arxiv.org/pdf/1708.01432.pdf

    Args:
        A (np.array): the (weighted) adjacency matrix
        memb (list): the node block membership 
    Output:
        qr: the group modular assortativity
        Q: the Newman modularity


    References
    ----------
    .. [peixoto2017-weighted] Tiago Peixoto, "Non parametric weighted stochastic block model",
       https://arxiv.org/pdf/1708.01432.pdf
       :url:`https://arxiv.org/pdf/1708.01432.pdf`.
    """

    B = len(np.unique(memb))
    E = np.triu(A,1).sum()
    ers,ersnorm = comm_mat(A, memb)
    np.fill_diagonal(ers,2*np.diagonal(ers))
    err = np.diagonal(ers)
    er2 = np.sum(ers,axis=0)**2
    qr = B/(2.0*E)*(err-(er2/(2.0*E)))
    Q = np.mean(qr) # newman modularity
    return qr, Q


[docs]def reindex_membership(memb, key='community_size', compress_singletons=False, adj=None):
    """
    This function has the membership as input and output the membership
    where the communities number are ordered by the number of nodes in that community

    Args:
        memb (list): a list of nodes membership
        key (str): by default it sorts the community indices by decreasing number of nodes in each community.
                   other arguments are 'community_weight' (sum of all internal community weights)
                   or 'community_normalized_weight' (internal weight normalized by internal pairs).
                   Important
        compress_singletons (bool): if set to True assign all singleton communities into a single community.
        adj (np.array): the adjacency matrix where to read the
    """

    ds = {}
    for u, v in enumerate(memb):
        if v not in ds.keys():
            ds[v] = []
        ds[v].append(u)

    def community_size(x):
        return len(x)

    def community_weight(x):
        return adj[np.ix_(x, x)].sum()

    def community_normalized_weight(x):
        return adj[np.ix_(x, x)].sum() / (len(x) * len(x) - 1)

    mykey = None
    if key is not 'community_size' and adj is None:
            raise AssertionError('Must input adjacency matrix too')
    else:
        if key is 'community_size':
            mykey = community_size
        elif key is 'community_weight':
            mykey = community_weight
        elif key is 'community_normalized_weight':
            mykey = community_normalized_weight

    S = dict(zip(range(0, len(ds)), sorted(ds.values(), key=mykey, reverse=True)))

    memo_reindex = [-1] * len(memb)
    for u, vl in S.items():
        for v in vl:
            memo_reindex[v] = u
    if compress_singletons:
        memo_reindex = reassign_singletons(memo_reindex)
    return memo_reindex


[docs]def reassign_singletons(memb):
    """
    Set all the singleton communities into the same community.
    If membership has C communities, S of which are singletons, then
    this function sets the singleton communities with the label C + 1
    Args:
        memb (np.array): the input membership vector
    Output:
        an array where all the nodes with a single community are merged 
        into one.
    """
    memb2 = np.array(reindex_membership(memb))
    max_memb = np.max(memb2) + 1
    memb3 = memb2.copy()

    for iu in np.unique(memb):
        ix = np.where(memb == iu)[0]
        if len(ix) > 1:
            max_memb = iu
    max_memb += 1
    for iu in np.unique(memb):
        ix = np.where(memb == iu)[0]
        if len(ix) == 1:
            memb3[ix] = max_memb
    memb3 = reindex_membership(memb3)
    return memb3