Graph Analytics with NetworkX

Graphs model relationships between entities – social networks, fraud rings, citation networks, and knowledge graphs. Learn to analyze them with NetworkX.

Graph Fundamentals

PR(v_i) = \frac{1-d}{N} + d \sum_{v_j \in In(v_i)} \frac{PR(v_j)}{|Out(v_j)|}

import networkx as nx
import numpy as np
from collections import defaultdict

# Create a graph
G = nx.Graph()

# Add nodes with attributes
G.add_node(1, name="Alice", age=30, role="engineer")
G.add_node(2, name="Bob", age=35, role="manager")
G.add_node(3, name="Charlie", age=28, role="engineer")

# Add edges with weights
G.add_edge(1, 2, weight=0.8, relationship="colleague")
G.add_edge(2, 3, weight=0.6, relationship="colleague")
G.add_edge(1, 3, weight=0.9, relationship="friend")

# Directed graph
DG = nx.DiGraph()
DG.add_edge("A", "B", weight=3)
DG.add_edge("B", "C", weight=2)
DG.add_edge("C", "A", weight=1)

# Graph properties
print(f"Nodes: {G.number_of_nodes()}")
print(f"Edges: {G.number_of_edges()}")
print(f"Density: {nx.density(G):.4f}")
print(f"Is connected: {nx.is_connected(G)}")

Centrality Measures

def analyze_centrality(G):
    """Compute and compare different centrality measures"""
    results = {}
    
    # Degree centrality (popularity)
    results['degree'] = nx.degree_centrality(G)
    
    # Betweenness centrality (bridge/broker role)
    results['betweenness'] = nx.betweenness_centrality(G, weight='weight')
    
    # Closeness centrality (access to all nodes)
    results['closeness'] = nx.closeness_centrality(G)
    
    # Eigenvector centrality (influence)
    try:
        results['eigenvector'] = nx.eigenvector_centrality(G, max_iter=1000)
    except nx.PowerIterationFailedConvergence:
        results['eigenvector'] = {}
    
    # PageRank (importance)
    results['pagerank'] = nx.pagerank(G, alpha=0.85, weight='weight')
    
    # Combine into DataFrame
    import pandas as pd
    df = pd.DataFrame(results)
    df.index.name = 'node'
    return df.sort_values('degree', ascending=False)

centrality_df = analyze_centrality(G)
print(centrality_df)

# Identify key influencers
top_nodes = centrality_df.nlargest(3, 'pagerank')
print("\nTop Influencers:")
print(top_nodes[['degree', 'pagerank', 'betweenness']])

Community Detection

import networkx as nx
from networkx.algorithms import community
import numpy as np

def detect_communities(G):
    """Apply multiple community detection algorithms"""
    
    # Louvain method (modularity optimization)
    louvain = community.louvain_communities(G, weight='weight', resolution=1.0)
    modularity = community.modularity(G, louvain, weight='weight')
    print(f"Louvain: {len(louvain)} communities, modularity={modularity:.4f}")
    
    # Label Propagation (fast, near-linear)
    label_prop = list(community.label_propagation_communities(G))
    print(f"Label Propagation: {len(label_prop)} communities")
    
    # Girvan-Newman (hierarchical)
    comp = community.girvan_newman(G)
    top_level = next(comp)
    print(f"Girvan-Newman: {len(top_level)} communities")
    
    # Greedy modularity
    greedy = community.greedy_modularity_communities(G, weight='weight')
    print(f"Greedy: {len(greedy)} communities, modularity={community.modularity(G, greedy):.4f}")
    
    return {
        'louvain': louvain,
        'label_propagation': label_prop,
        'greedy': greedy
    }

# Create test graph with community structure
G_comm = nx.planted_partition_graph(4, 20, 0.3, 0.01, seed=42)
communities = detect_communities(G_comm)

# Visualize
import matplotlib.pyplot as plt

pos = nx.spring_layout(G_comm, seed=42)
colors = plt.cm.Set3(np.linspace(0, 1, len(communities['louvain'])))

for i, comm in enumerate(communities['louvain']):
    nx.draw_networkx_nodes(G_comm, pos, nodelist=list(comm), 
                           node_color=[colors[i]], node_size=50)

nx.draw_networkx_edges(G_comm, pos, alpha=0.3)
plt.show()

Graph Algorithms

# Shortest paths
def shortest_paths_analysis(G):
    """Analyze connectivity and distances"""
    
    # All pairs shortest path
    shortest = dict(nx.all_pairs_shortest_path_length(G))
    
    # Diameter and average path length
    if nx.is_connected(G):
        diameter = nx.diameter(G)
        avg_path = nx.average_shortest_path_length(G)
        print(f"Diameter: {diameter}")
        print(f"Average path length: {avg_path:.4f}")
    
    # Betweenness centrality for bridge detection
    edge_betweenness = nx.edge_betweenness_centrality(G, weight='weight')
    top_bridges = sorted(edge_betweenness.items(), key=lambda x: x[1], reverse=True)[:5]
    
    return {
        'diameter': diameter if nx.is_connected(G) else None,
        'top_bridges': top_bridges
    }

# Minimum spanning tree
def spanning_tree_analysis(G):
    """Find minimum spanning tree for network design"""
    mst = nx.minimum_spanning_tree(G, algorithm='kruskal')
    total_weight = sum(d['weight'] for u, v, d in mst.edges(data=True))
    print(f"MST total weight: {total_weight:.4f}")
    return mst

# Bipartite graph analysis
from networkx.algorithms import bipartite

B = nx.Graph()
B.add_nodes_from([1, 2, 3], bipartite=0)  # Users
B.add_nodes_from(['a', 'b', 'c'], bipartite=1)  # Items
B.add_edges_from([(1, 'a'), (1, 'b'), (2, 'b'), (2, 'c'), (3, 'a'), (3, 'c')])

# Maximum matching
matching = bipartite.maximum_matching(B)
print(f"Maximum matching: {matching}")

# Projection
user_projection = bipartite.projected_graph(B, [1, 2, 3])
item_projection = bipartite.projected_graph(B, ['a', 'b', 'c'])

Knowledge Graphs

import networkx as nx
from rdflib import Graph, Literal, RDF, URIRef
from rdflib.namespace import FOAF, XSD

class KnowledgeGraph:
    def __init__(self):
        self.graph = nx.DiGraph()
        self.rdf_graph = Graph()
    
    def add_entity(self, entity_id, entity_type, attributes=None):
        self.graph.add_node(entity_id, type=entity_type, **(attributes or {}))
    
    def add_relation(self, subject, predicate, obj, **attrs):
        self.graph.add_edge(subject, obj, relation=predicate, **attrs)
    
    def query_path(self, start, end, max_length=3):
        """Find all paths between entities up to max_length"""
        try:
            paths = list(nx.all_simple_paths(self.graph, start, end, cutoff=max_length))
            return paths
        except nx.NetworkXNoPath:
            return []
    
    def infer_type(self, entity, relation_chain):
        """Infer entity type through relation chain"""
        current = {entity}
        for relation in relation_chain:
            next_nodes = set()
            for node in current:
                for neighbor in self.graph.neighbors(node):
                    edge_data = self.graph[node][neighbor]
                    if edge_data.get('relation') == relation:
                        next_nodes.add(neighbor)
            current = next_nodes
        return current

# Build a knowledge graph
kg = KnowledgeGraph()
kg.add_entity("alice", "Person", {"age": 30})
kg.add_entity("google", "Company", {"founded": 1998})
kg.add_entity("python", "Language", {"paradigm": "multi"})
kg.add_relation("alice", "works_at", "google")
kg.add_relation("alice", "knows", "python")

# Query
paths = kg.query_path("alice", "google")
print(f"Paths from alice to google: {paths}")

Best Practices

Choose the right graph type – directed vs undirected, weighted vs unweighted
Handle large graphs with sampling or approximate algorithms
Use community detection to simplify complex networks
Combine centrality measures for more robust analysis
Visualize results to communicate findings effectively