Community Detection Examples¶
This page demonstrates how to detect communities in graphs using PyGraphina.
Example 1: Basic Label Propagation¶
Label Propagation is a fast, parameter-free community detection algorithm.
import pygraphina as pg
# Create a graph with clear community structure
g = pg.PyGraph()
nodes = [g.add_node(i) for i in range(12)]
# Community 1: nodes 0-3 (densely connected)
for i in range(4):
for j in range(i+1, 4):
g.add_edge(nodes[i], nodes[j], 1.0)
# Community 2: nodes 4-7 (densely connected)
for i in range(4, 8):
for j in range(i+1, 8):
g.add_edge(nodes[i], nodes[j], 1.0)
# Community 3: nodes 8-11 (densely connected)
for i in range(8, 12):
for j in range(i+1, 12):
g.add_edge(nodes[i], nodes[j], 1.0)
# Add a few inter-community edges
g.add_edge(nodes[3], nodes[4], 1.0)
g.add_edge(nodes[7], nodes[8], 1.0)
# Detect communities
communities = pg.community.label_propagation(g, max_iter=100)
# Group nodes by community
from collections import defaultdict
comm_groups = defaultdict(list)
for node, comm_id in communities.items():
comm_groups[comm_id].append(node)
print(f"Detected {len(comm_groups)} communities:")
for comm_id, members in comm_groups.items():
print(f" Community {comm_id}: {sorted(members)}")
Example 2: Louvain Method on Karate Club¶
The Louvain method optimizes modularity to find communities.
import pygraphina as pg
# Create a graph similar to Zachary's Karate Club
g = pg.PyGraph()
nodes = [g.add_node(i) for i in range(34)]
# Add edges (simplified karate club structure)
edges = [
(0, 1), (0, 2), (0, 3), (0, 4), (0, 5), (0, 6), (0, 7), (0, 8),
(0, 10), (0, 11), (0, 12), (0, 13), (0, 17), (0, 19), (0, 21), (0, 31),
(1, 2), (1, 3), (1, 7), (1, 13), (1, 17), (1, 19), (1, 21), (1, 30),
(2, 3), (2, 7), (2, 8), (2, 9), (2, 13), (2, 27), (2, 28), (2, 32),
(3, 7), (3, 12), (3, 13), (4, 6), (4, 10), (5, 6), (5, 10), (5, 16),
(6, 16), (8, 30), (8, 32), (8, 33), (9, 33), (13, 33), (14, 32), (14, 33),
(15, 32), (15, 33), (18, 32), (18, 33), (19, 33), (20, 32), (20, 33),
(22, 32), (22, 33), (23, 25), (23, 27), (23, 29), (23, 32), (23, 33),
(24, 25), (24, 27), (24, 31), (25, 31), (26, 29), (26, 33), (27, 33),
(28, 31), (28, 33), (29, 32), (29, 33), (30, 32), (30, 33), (31, 32),
(31, 33), (32, 33),
]
for u, v in edges:
g.add_edge(nodes[u], nodes[v], 1.0)
# Apply Louvain algorithm
communities = pg.community.louvain(g)
# communities is a list of lists: [[node1, node2, ...], [node3, ...], ...]
print(f"Found {len(communities)} communities:")
for comm_id, members in enumerate(communities):
print(f" Community {comm_id}: {len(members)} members")
print(f" Nodes: {sorted(members)[:10]}{'...' if len(members) > 10 else ''}")
# Analyze community structure
for comm_id, members in enumerate(communities):
if len(members) > 0:
# Create subgraph for this community
subg = g.subgraph(members)
density = subg.density() if subg.node_count() > 1 else 0
print(f"\n Community {comm_id} density: {density:.3f}")
Example 3: Girvan-Newman Algorithm¶
Girvan-Newman finds communities by iteratively removing edges with high betweenness.
import pygraphina as pg
# Create a graph with two connected clusters
g = pg.PyGraph()
nodes = [g.add_node(i) for i in range(8)]
# Cluster 1
g.add_edge(nodes[0], nodes[1], 1.0)
g.add_edge(nodes[1], nodes[2], 1.0)
g.add_edge(nodes[2], nodes[0], 1.0)
g.add_edge(nodes[1], nodes[3], 1.0)
# Bridge edge (will be detected as between-community)
g.add_edge(nodes[3], nodes[4], 1.0)
# Cluster 2
g.add_edge(nodes[4], nodes[5], 1.0)
g.add_edge(nodes[5], nodes[6], 1.0)
g.add_edge(nodes[6], nodes[4], 1.0)
g.add_edge(nodes[5], nodes[7], 1.0)
# Detect communities (returns list of lists)
communities = pg.community.girvan_newman(g, 2)
# Display results
print(f"Found {len(communities)} communities:")
for comm_id, members in enumerate(communities):
print(f"Community {comm_id}: {sorted(members)}")
Example 4: Spectral Clustering¶
Spectral clustering uses the graph Laplacian to find communities.
import pygraphina as pg
# Create a graph with three communities
g = pg.PyGraph()
# Add nodes in groups
community_sizes = [5, 6, 4]
nodes = []
for i in range(sum(community_sizes)):
nodes.append(g.add_node(i))
# Build communities with high internal connectivity
offset = 0
for size in community_sizes:
# Connect nodes within community
for i in range(size):
for j in range(i+1, size):
g.add_edge(nodes[offset+i], nodes[offset+j], 1.0)
offset += size
# Add sparse inter-community edges
g.add_edge(nodes[4], nodes[5], 0.5) # Between community 0 and 1
g.add_edge(nodes[10], nodes[11], 0.5) # Between community 1 and 2
# Apply spectral clustering (returns list of lists)
k = 3 # Number of communities
communities = pg.community.spectral_clustering(g, k)
# Analyze results
print(f"Found {len(communities)} communities:")
for comm_id, members in enumerate(communities):
print(f" Community {comm_id}: {len(members)} nodes: {sorted(members)}")
Example 5: Connected Components¶
For disconnected graphs, connected components form natural communities.
import pygraphina as pg
# Create a graph with multiple disconnected components
g = pg.PyGraph()
nodes = [g.add_node(i) for i in range(15)]
# Component 1: nodes 0-4
edges_1 = [(0,1), (1,2), (2,3), (3,4), (4,0)]
for u, v in edges_1:
g.add_edge(nodes[u], nodes[v], 1.0)
# Component 2: nodes 5-9
edges_2 = [(5,6), (6,7), (7,8), (8,9), (9,5)]
for u, v in edges_2:
g.add_edge(nodes[u], nodes[v], 1.0)
# Component 3: nodes 10-14
edges_3 = [(10,11), (11,12), (12,13), (13,14)]
for u, v in edges_3:
g.add_edge(nodes[u], nodes[v], 1.0)
# Find connected components (returns list of lists)
components = pg.community.connected_components(g)
print(f"Found {len(components)} components:")
for comp_id, members in enumerate(components):
print(f" Component {comp_id}: {sorted(members)}")
Example 6: Comparing Community Detection Methods¶
import pygraphina as pg
import time
# Create a test graph
g = pg.core.barabasi_albert(100, 3, seed=42)
# Test different algorithms
algorithms = [
("Label Propagation", lambda: pg.community.label_propagation(g, 100)),
("Louvain", lambda: pg.community.louvain(g)),
("Connected Components", lambda: pg.community.connected_components(g)),
]
results = {}
for name, algo in algorithms:
start = time.time()
communities = algo()
elapsed = time.time() - start
# Count communities - handle both dict and list return types
if isinstance(communities, dict):
num_communities = len(set(communities.values()))
else:
num_communities = len(communities) # list of lists
results[name] = {
'communities': num_communities,
'time': elapsed
}
print(f"{name}:")
print(f" Communities: {num_communities}")
print(f" Time: {elapsed:.4f}s")
Example 7: Hierarchical Community Detection¶
import pygraphina as pg
# Create a hierarchical structure
g = pg.PyGraph()
nodes = [g.add_node(i) for i in range(20)]
# Level 1: Two major groups
# Group A: nodes 0-9
for i in range(10):
for j in range(i+1, 10):
if (i % 5) == (j % 5) or abs(i - j) == 1:
g.add_edge(nodes[i], nodes[j], 1.0)
# Group B: nodes 10-19
for i in range(10, 20):
for j in range(i+1, 20):
if ((i-10) % 5) == ((j-10) % 5) or abs(i - j) == 1:
g.add_edge(nodes[i], nodes[j], 1.0)
# Weak inter-group connection
g.add_edge(nodes[9], nodes[10], 0.1)
# First level: detect major communities (returns list of lists)
major_communities = pg.community.louvain(g)
print("Hierarchical community structure:")
for major_id, members in enumerate(major_communities):
print(f"\nMajor community {major_id} ({len(members)} nodes):")
# Create subgraph and detect sub-communities
if len(members) > 1:
subg = g.subgraph(members)
if subg.node_count() > 1:
sub_communities = pg.community.louvain(subg)
for sub_id, sub_members in enumerate(sub_communities):
print(f" Sub-community {sub_id}: {sorted(sub_members)}")
Example 8: Community Quality Metrics¶
import pygraphina as pg
# Create a graph
g = pg.core.erdos_renyi(50, 0.1, seed=42)
# Detect communities (returns list of lists)
communities = pg.community.louvain(g)
print("Community Quality Metrics:")
print("=" * 50)
for comm_id, members in enumerate(communities):
if len(members) == 0:
continue
# Create subgraph for this community
subg = g.subgraph(members)
# Internal edges
internal_edges = subg.edge_count()
# Possible internal edges
n = len(members)
possible_edges = n * (n - 1) // 2
# Internal density
density = internal_edges / possible_edges if possible_edges > 0 else 0
# Average clustering
clustering = subg.average_clustering() if subg.node_count() > 0 else 0
print(f"\nCommunity {comm_id}:")
print(f" Size: {len(members)} nodes")
print(f" Internal edges: {internal_edges}")
print(f" Density: {density:.3f}")
print(f" Avg clustering: {clustering:.3f}")