Graph Analytics with NetworkX
Graphs model relationships between entities β social networks, fraud rings, citation networks, and knowledge graphs. Learn to analyze them with NetworkX.
Graph Fundamentals
import networkx as nx
import numpy as np
from collections import defaultdict
# Create a graph
G = nx.Graph()
# Add nodes with attributes
G.add_node(1, name="Alice", age=30, role="engineer")
G.add_node(2, name="Bob", age=35, role="manager")
G.add_node(3, name="Charlie", age=28, role="engineer")
# Add edges with weights
G.add_edge(1, 2, weight=0.8, relationship="colleague")
G.add_edge(2, 3, weight=0.6, relationship="colleague")
G.add_edge(1, 3, weight=0.9, relationship="friend")
# Directed graph
DG = nx.DiGraph()
DG.add_edge("A", "B", weight=3)
DG.add_edge("B", "C", weight=2)
DG.add_edge("C", "A", weight=1)
# Graph properties
print(f"Nodes: {G.number_of_nodes()}")
print(f"Edges: {G.number_of_edges()}")
print(f"Density: {nx.density(G):.4f}")
print(f"Is connected: {nx.is_connected(G)}")
Centrality Measures
def analyze_centrality(G):
"""Compute and compare different centrality measures"""
results = {}
# Degree centrality (popularity)
results['degree'] = nx.degree_centrality(G)
# Betweenness centrality (bridge/broker role)
results['betweenness'] = nx.betweenness_centrality(G, weight='weight')
# Closeness centrality (access to all nodes)
results['closeness'] = nx.closeness_centrality(G)
# Eigenvector centrality (influence)
try:
results['eigenvector'] = nx.eigenvector_centrality(G, max_iter=1000)
except nx.PowerIterationFailedConvergence:
results['eigenvector'] = {}
# PageRank (importance)
results['pagerank'] = nx.pagerank(G, alpha=0.85, weight='weight')
# Combine into DataFrame
import pandas as pd
df = pd.DataFrame(results)
df.index.name = 'node'
return df.sort_values('degree', ascending=False)
centrality_df = analyze_centrality(G)
print(centrality_df)
# Identify key influencers
top_nodes = centrality_df.nlargest(3, 'pagerank')
print("\nTop Influencers:")
print(top_nodes[['degree', 'pagerank', 'betweenness']])
Community Detection
import networkx as nx
from networkx.algorithms import community
import numpy as np
def detect_communities(G):
"""Apply multiple community detection algorithms"""
# Louvain method (modularity optimization)
louvain = community.louvain_communities(G, weight='weight', resolution=1.0)
modularity = community.modularity(G, louvain, weight='weight')
print(f"Louvain: {len(louvain)} communities, modularity={modularity:.4f}")
# Label Propagation (fast, near-linear)
label_prop = list(community.label_propagation_communities(G))
print(f"Label Propagation: {len(label_prop)} communities")
# Girvan-Newman (hierarchical)
comp = community.girvan_newman(G)
top_level = next(comp)
print(f"Girvan-Newman: {len(top_level)} communities")
# Greedy modularity
greedy = community.greedy_modularity_communities(G, weight='weight')
print(f"Greedy: {len(greedy)} communities, modularity={community.modularity(G, greedy):.4f}")
return {
'louvain': louvain,
'label_propagation': label_prop,
'greedy': greedy
}
# Create test graph with community structure
G_comm = nx.planted_partition_graph(4, 20, 0.3, 0.01, seed=42)
communities = detect_communities(G_comm)
# Visualize
import matplotlib.pyplot as plt
pos = nx.spring_layout(G_comm, seed=42)
colors = plt.cm.Set3(np.linspace(0, 1, len(communities['louvain'])))
for i, comm in enumerate(communities['louvain']):
nx.draw_networkx_nodes(G_comm, pos, nodelist=list(comm),
node_color=[colors[i]], node_size=50)
nx.draw_networkx_edges(G_comm, pos, alpha=0.3)
plt.show()
Graph Algorithms
# Shortest paths
def shortest_paths_analysis(G):
"""Analyze connectivity and distances"""
# All pairs shortest path
shortest = dict(nx.all_pairs_shortest_path_length(G))
# Diameter and average path length
if nx.is_connected(G):
diameter = nx.diameter(G)
avg_path = nx.average_shortest_path_length(G)
print(f"Diameter: {diameter}")
print(f"Average path length: {avg_path:.4f}")
# Betweenness centrality for bridge detection
edge_betweenness = nx.edge_betweenness_centrality(G, weight='weight')
top_bridges = sorted(edge_betweenness.items(), key=lambda x: x[1], reverse=True)[:5]
return {
'diameter': diameter if nx.is_connected(G) else None,
'top_bridges': top_bridges
}
# Minimum spanning tree
def spanning_tree_analysis(G):
"""Find minimum spanning tree for network design"""
mst = nx.minimum_spanning_tree(G, algorithm='kruskal')
total_weight = sum(d['weight'] for u, v, d in mst.edges(data=True))
print(f"MST total weight: {total_weight:.4f}")
return mst
# Bipartite graph analysis
from networkx.algorithms import bipartite
B = nx.Graph()
B.add_nodes_from([1, 2, 3], bipartite=0) # Users
B.add_nodes_from(['a', 'b', 'c'], bipartite=1) # Items
B.add_edges_from([(1, 'a'), (1, 'b'), (2, 'b'), (2, 'c'), (3, 'a'), (3, 'c')])
# Maximum matching
matching = bipartite.maximum_matching(B)
print(f"Maximum matching: {matching}")
# Projection
user_projection = bipartite.projected_graph(B, [1, 2, 3])
item_projection = bipartite.projected_graph(B, ['a', 'b', 'c'])
Knowledge Graphs
import networkx as nx
from rdflib import Graph, Literal, RDF, URIRef
from rdflib.namespace import FOAF, XSD
class KnowledgeGraph:
def __init__(self):
self.graph = nx.DiGraph()
self.rdf_graph = Graph()
def add_entity(self, entity_id, entity_type, attributes=None):
self.graph.add_node(entity_id, type=entity_type, **(attributes or {}))
def add_relation(self, subject, predicate, obj, **attrs):
self.graph.add_edge(subject, obj, relation=predicate, **attrs)
def query_path(self, start, end, max_length=3):
"""Find all paths between entities up to max_length"""
try:
paths = list(nx.all_simple_paths(self.graph, start, end, cutoff=max_length))
return paths
except nx.NetworkXNoPath:
return []
def infer_type(self, entity, relation_chain):
"""Infer entity type through relation chain"""
current = {entity}
for relation in relation_chain:
next_nodes = set()
for node in current:
for neighbor in self.graph.neighbors(node):
edge_data = self.graph[node][neighbor]
if edge_data.get('relation') == relation:
next_nodes.add(neighbor)
current = next_nodes
return current
# Build a knowledge graph
kg = KnowledgeGraph()
kg.add_entity("alice", "Person", {"age": 30})
kg.add_entity("google", "Company", {"founded": 1998})
kg.add_entity("python", "Language", {"paradigm": "multi"})
kg.add_relation("alice", "works_at", "google")
kg.add_relation("alice", "knows", "python")
# Query
paths = kg.query_path("alice", "google")
print(f"Paths from alice to google: {paths}")
Best Practices
- Choose the right graph type β directed vs undirected, weighted vs unweighted
- Handle large graphs with sampling or approximate algorithms
- Use community detection to simplify complex networks
- Combine centrality measures for more robust analysis
- Visualize results to communicate findings effectively