Source code for networkqit.algorithms.community

#! /usr/bin/env python
# -*- coding: utf-8 -*-
#
# networkqit -- a python module for manipulations of spectral entropies framework
#
# Copyright (C) 2017-2018 Carlo Nicolini <carlo.nicolini@iit.it>
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program.  If not, see <http://www.gnu.org/licenses/>.

"""
Here a number of community detection utilities are implemented for the manipulation
of membership vectors, as well as for the detection of block structures in networks.
"""

import autograd.numpy as np

__all__ =  ['comm_mat',
            'comm_assortativity',
            'reindex_membership',
            'reassign_singletons'
            ]

[docs]def comm_mat(adj, memb): """ Returns multiple community related quantities. Input ----- adj : :class:`~np.array`: a possibly weighted n x n adjacency matrix in the form of a N x N numpy array memb: :class:`~np.array, list`: the nodal membership vector, a numpy array or list of integers. Ouput ----- B: :class`~np.array`: the CxC block matrix containing in its (r,s) element the number of links from community `r` to community `s`. Bnorm: :class`~np.array`: the CxC block matrix containing in its (r,s) element the density of links. It is the same as B but divided by the actual number of possible node pairs nr x ns for off-diagonal terms, `(nr x (nr-1))/2` for diagonal elements. References ---------- .. [holland-stochastic-1983] Paul W. Holland, Kathryn Blackmond Laskey, Samuel Leinhardt, "Stochastic blockmodels: First steps", Carnegie-Mellon University, Pittsburgh, PA 15213, U.S.A., :doi:`10.1016/0378-8733(83)90021-7`. """ u = np.unique(memb) C = np.zeros([len(u), len(memb)]) for i in range(C.shape[0]): for j in range(0, len(memb)): C[i, j] = memb[j] == u[i] B = np.dot(np.dot(C, adj), C.T) K = B.sum(axis=1) #print(K**2) np.fill_diagonal(B, B.diagonal()/2) # n = len(adj) # m = np.triu(W, 1).sum() # p = n * (n - 1) / 2 commsizes = C.sum(axis=1) commpairs = np.dot(np.diag(commsizes), np.diag(commsizes-1)) / 2 commpairs2 = np.dot(np.dot(np.diag(commsizes), np.ones([len(commsizes), len(commsizes)])), np.diag(commsizes)) blockpairs = np.multiply(commpairs2, (1-np.eye(len(commsizes)))) + commpairs Bnorm = B / blockpairs return B, Bnorm
[docs]def comm_assortativity(A, memb): """ This function computes the modular group assortativity and the newman modularity as from Eq.18-19 of the Peixoto paper "Nonparametric weighted stochastic block models" https://arxiv.org/pdf/1708.01432.pdf Args: A (np.array): the (weighted) adjacency matrix memb (list): the node block membership Output: qr: the group modular assortativity Q: the Newman modularity References ---------- .. [peixoto2017-weighted] Tiago Peixoto, "Non parametric weighted stochastic block model", https://arxiv.org/pdf/1708.01432.pdf :url:`https://arxiv.org/pdf/1708.01432.pdf`. """ B = len(np.unique(memb)) E = np.triu(A,1).sum() ers,ersnorm = comm_mat(A, memb) np.fill_diagonal(ers,2*np.diagonal(ers)) err = np.diagonal(ers) er2 = np.sum(ers,axis=0)**2 qr = B/(2.0*E)*(err-(er2/(2.0*E))) Q = np.mean(qr) # newman modularity return qr, Q
[docs]def reindex_membership(memb, key='community_size', compress_singletons=False, adj=None): """ This function has the membership as input and output the membership where the communities number are ordered by the number of nodes in that community Args: memb (list): a list of nodes membership key (str): by default it sorts the community indices by decreasing number of nodes in each community. other arguments are 'community_weight' (sum of all internal community weights) or 'community_normalized_weight' (internal weight normalized by internal pairs). Important compress_singletons (bool): if set to True assign all singleton communities into a single community. adj (np.array): the adjacency matrix where to read the """ ds = {} for u, v in enumerate(memb): if v not in ds.keys(): ds[v] = [] ds[v].append(u) def community_size(x): return len(x) def community_weight(x): return adj[np.ix_(x, x)].sum() def community_normalized_weight(x): return adj[np.ix_(x, x)].sum() / (len(x) * len(x) - 1) mykey = None if key is not 'community_size' and adj is None: raise AssertionError('Must input adjacency matrix too') else: if key is 'community_size': mykey = community_size elif key is 'community_weight': mykey = community_weight elif key is 'community_normalized_weight': mykey = community_normalized_weight S = dict(zip(range(0, len(ds)), sorted(ds.values(), key=mykey, reverse=True))) memo_reindex = [-1] * len(memb) for u, vl in S.items(): for v in vl: memo_reindex[v] = u if compress_singletons: memo_reindex = reassign_singletons(memo_reindex) return memo_reindex
[docs]def reassign_singletons(memb): """ Set all the singleton communities into the same community. If membership has C communities, S of which are singletons, then this function sets the singleton communities with the label C + 1 Args: memb (np.array): the input membership vector Output: an array where all the nodes with a single community are merged into one. """ memb2 = np.array(reindex_membership(memb)) max_memb = np.max(memb2) + 1 memb3 = memb2.copy() for iu in np.unique(memb): ix = np.where(memb == iu)[0] if len(ix) > 1: max_memb = iu max_memb += 1 for iu in np.unique(memb): ix = np.where(memb == iu)[0] if len(ix) == 1: memb3[ix] = max_memb memb3 = reindex_membership(memb3) return memb3