Interview Question: "How would you find the shortest path between two nodes in SQL? Explain the MATCH clause and path functions. Compare recursive CTEs vs MATCH for graph traversal." β Asked at LinkedIn, Twitter, Meta for Graph Database roles
βΉοΈ
Difficulty: Advanced | Companies: LinkedIn, Twitter, Meta, Pinterest, Amazon Neptune | Time: 60-75 minutes
Graph Data Model in SQL
-- Create graph tables (node and edge tables)
CREATE TABLE graph_nodes (
node_id INT PRIMARY KEY,
node_type VARCHAR(50),
label VARCHAR(100),
properties JSONB
);
CREATE TABLE graph_edges (
edge_id SERIAL PRIMARY KEY,
source_node INT REFERENCES graph_nodes(node_id),
target_node INT REFERENCES graph_nodes(node_id),
edge_type VARCHAR(50),
weight DECIMAL(10,2),
properties JSONB
);
-- Insert social network data
INSERT INTO graph_nodes VALUES
(1, 'person', 'Alice', '{"age": 30, "city": "SF"}'),
(2, 'person', 'Bob', '{"age": 25, "city": "NYC"}'),
(3, 'person', 'Charlie', '{"age": 35, "city": "SF"}'),
(4, 'person', 'Diana', '{"age": 28, "city": "LA"}'),
(5, 'person', 'Eve', '{"age": 32, "city": "NYC"}'),
(6, 'company', 'TechCorp', '{"industry": "tech"}'),
(7, 'company', 'FinanceInc', '{"industry": "finance"}'),
(8, 'skill', 'SQL', '{"level": "advanced"}'),
(9, 'skill', 'Python', '{"level": "intermediate"}'),
(10, 'skill', 'Java', '{"level": "beginner"}');
INSERT INTO graph_edges (source_node, target_node, edge_type, weight, properties) VALUES
(1, 2, 'knows', 0.8, '{"since": "2020"}'),
(1, 3, 'knows', 0.9, '{"since": "2019"}'),
(2, 4, 'knows', 0.7, '{"since": "2021"}'),
(3, 5, 'knows', 0.85, '{"since": "2020"}'),
(4, 5, 'knows', 0.6, '{"since": "2022"}'),
(1, 6, 'works_at', NULL, '{"role": "engineer"}'),
(2, 7, 'works_at', NULL, '{"role": "analyst"}'),
(3, 6, 'works_at', NULL, '{"role": "manager"}'),
(1, 8, 'has_skill', 0.9, NULL),
(1, 9, 'has_skill', 0.7, NULL),
(2, 8, 'has_skill', 0.8, NULL),
(3, 9, 'has_skill', 0.85, NULL),
(4, 10, 'has_skill', 0.6, NULL),
(5, 8, 'has_skill', 0.75, NULL),
(5, 9, 'has_skill', 0.8, NULL);
Recursive CTE Graph Traversal
-- Find all paths from Alice to Eve
WITH RECURSIVE graph_paths AS (
-- Anchor: Start from Alice
SELECT
node_id,
label,
1 AS depth,
ARRAY[node_id] AS path,
label::TEXT AS path_text,
0.0 AS total_weight
FROM graph_nodes
WHERE label = 'Alice'
UNION ALL
-- Recursive: Traverse edges
SELECT
n.node_id,
n.label,
p.depth + 1,
p.path || n.node_id,
p.path_text || ' β ' || n.label,
p.total_weight + COALESCE(e.weight, 0)
FROM graph_paths p
INNER JOIN graph_edges e ON p.node_id = e.source_node
INNER JOIN graph_nodes n ON e.target_node = n.node_id
WHERE n.node_id != ALL(p.path) -- No cycles
AND p.depth < 10 -- Max depth
)
SELECT
path_text,
depth,
total_weight,
path
FROM graph_paths
WHERE label = 'Eve'
ORDER BY depth, total_weight;
Output:
| path_text | depth | total_weight | path |
|---|---|---|---|
| Alice β Bob β Diana β Eve | 4 | 2.10 | {1,2,4,5} |
| Alice β Charlie β Eve | 3 | 1.75 | {1,3,5} |
| Alice β Bob β Diana β Eve | 4 | 2.10 | {1,2,4,5} |
Shortest Path with Dijkstra
-- Dijkstra's algorithm using recursive CTE
WITH RECURSIVE dijkstra AS (
-- Initialize: source node has distance 0
SELECT
node_id,
label,
0 AS distance,
ARRAY[node_id] AS path,
label::TEXT AS path_text
FROM graph_nodes
WHERE label = 'Alice'
UNION ALL
-- Relax edges
SELECT
n.node_id,
n.label,
d.distance + COALESCE(e.weight, 1),
d.path || n.node_id,
d.path_text || ' β ' || n.label
FROM dijkstra d
INNER JOIN graph_edges e ON d.node_id = e.source_node
INNER JOIN graph_nodes n ON e.target_node = n.node_id
WHERE n.node_id != ALL(d.path)
AND d.distance + COALESCE(e.weight, 1) < (
SELECT MIN(distance) FROM dijkstra WHERE node_id = n.node_id
)
)
SELECT DISTINCT ON (node_id)
node_id,
label,
distance,
path_text
FROM dijkstra
WHERE label = 'Eve'
ORDER BY node_id, distance;
Bidirectional Search
-- Bidirectional BFS from both source and target
WITH RECURSIVE
forward_search AS (
SELECT
node_id,
1 AS depth,
ARRAY[node_id] AS path
FROM graph_nodes WHERE label = 'Alice'
UNION ALL
SELECT
n.node_id,
f.depth + 1,
f.path || n.node_id
FROM forward_search f
JOIN graph_edges e ON f.node_id = e.source_node
JOIN graph_nodes n ON e.target_node = n.node_id
WHERE n.node_id != ALL(f.path) AND f.depth < 5
),
backward_search AS (
SELECT
node_id,
1 AS depth,
ARRAY[node_id] AS path
FROM graph_nodes WHERE label = 'Eve'
UNION ALL
SELECT
n.node_id,
b.depth + 1,
b.path || n.node_id
FROM backward_search b
JOIN graph_edges e ON b.node_id = e.target_node
JOIN graph_nodes n ON e.source_node = n.node_id
WHERE n.node_id != ALL(b.path) AND b.depth < 5
)
SELECT
f.path || b.path[2:] AS full_path,
f.depth + b.depth - 1 AS total_depth
FROM forward_search f
JOIN backward_search b ON f.node_id = b.node_id
WHERE f.node_id NOT IN (SELECT node_id FROM graph_nodes WHERE label IN ('Alice', 'Eve'))
ORDER BY total_depth
LIMIT 5;
Graph Centrality Measures
-- Degree centrality
SELECT
n.node_id,
n.label,
COUNT(DISTINCT e1.edge_id) AS out_degree,
COUNT(DISTINCT e2.edge_id) AS in_degree,
COUNT(DISTINCT e1.edge_id) + COUNT(DISTINCT e2.edge_id) AS total_degree
FROM graph_nodes n
LEFT JOIN graph_edges e1 ON n.node_id = e1.source_node
LEFT JOIN graph_edges e2 ON n.node_id = e2.target_node
GROUP BY n.node_id, n.label
ORDER BY total_degree DESC;
-- Betweenness centrality (simplified)
WITH all_shortest_paths AS (
SELECT
source_node,
target_node,
path,
path_text
FROM graph_paths
WHERE source_node != target_node
)
SELECT
n.node_id,
n.label,
COUNT(*) FILTER (WHERE n.node_id = ANY(
SELECT unnest(path[2:array_length(path, 1)-1])
)) AS betweenness_count
FROM graph_nodes n
CROSS JOIN all_shortest_paths p
GROUP BY n.node_id, n.label
ORDER BY betweenness_count DESC;
MATCH Clause (ISO SQL/PGQ)
-- PostgreSQL 14+ graph query syntax (experimental)
-- CREATE PROPERTY GRAPH for graph queries
-- Simulated MATCH with recursive CTE
WITH RECURSIVE match_query AS (
-- Pattern: (a:person)-[:knows]->(b:person)
SELECT
a.node_id AS a_id,
a.label AS a_name,
b.node_id AS b_id,
b.label AS b_name,
e.weight
FROM graph_nodes a
JOIN graph_edges e ON a.node_id = e.source_node
JOIN graph_nodes b ON e.target_node = b.node_id
WHERE a.node_type = 'person'
AND b.node_type = 'person'
AND e.edge_type = 'knows'
)
SELECT * FROM match_query;
Graph Pattern Matching
-- Find triangles (3-clique)
SELECT
a.label AS person_a,
b.label AS person_b,
c.label AS person_c
FROM graph_nodes a
JOIN graph_edges e1 ON a.node_id = e1.source_node
JOIN graph_nodes b ON e1.target_node = b.node_id
JOIN graph_edges e2 ON b.node_id = e2.source_node
JOIN graph_nodes c ON e2.target_node = c.node_id
JOIN graph_edges e3 ON c.node_id = e3.source_node
WHERE e3.target_node = a.node_id
AND a.node_id < b.node_id
AND b.node_id < c.node_id;
-- Find 2-hop neighbors
WITH RECURSIVE two_hop AS (
SELECT
n1.node_id AS source,
n2.node_id AS neighbor,
1 AS hops
FROM graph_nodes n1
JOIN graph_edges e ON n1.node_id = e.source_node
JOIN graph_nodes n2 ON e.target_node = n2.node_id
UNION
SELECT
t.source,
n3.node_id,
2
FROM two_hop t
JOIN graph_edges e ON t.neighbor = e.source_node
JOIN graph_nodes n3 ON e.target_node = n3.node_id
WHERE n3.node_id != t.source
)
SELECT DISTINCT
s.label AS source,
n.label AS two_hop_neighbor
FROM two_hop t
JOIN graph_nodes s ON t.source = s.node_id
JOIN graph_nodes n ON t.neighbor = n.node_id
WHERE t.hops = 2
ORDER BY s.label, n.label;
Graph Aggregation
-- Community detection (label propagation simplified)
WITH RECURSIVE communities AS (
SELECT
node_id,
node_id AS community_id,
1 AS iteration
FROM graph_nodes
UNION ALL
SELECT
c.node_id,
mode() WITHIN GROUP (ORDER BY c2.community_id),
c.iteration + 1
FROM communities c
JOIN graph_edges e ON c.node_id = e.source_node
JOIN communities c2 ON e.target_node = c2.node_id
WHERE c.iteration < 10
GROUP BY c.node_id, c.iteration
HAVING mode() WITHIN GROUP (ORDER BY c2.community_id) != c.community_id
)
SELECT DISTINCT
n.label,
c.community_id
FROM communities c
JOIN graph_nodes n ON c.node_id = n.node_id
WHERE c.iteration = (SELECT MAX(iteration) FROM communities)
ORDER BY c.community_id, n.label;
Mathematical Graph Theory
Graph where:
- nodes
- edges
Adjacency matrix :
Degree centrality:
Betweenness centrality:
βΉοΈ
Graph Algorithms: For production graph workloads, consider dedicated graph databases like Neo4j, Amazon Neptune, or Apache AGE extension for PostgreSQL.
Performance Considerations
| Operation | Time Complexity | Space Complexity |
|---|---|---|
| BFS | ||
| DFS | ||
| Dijkstra | ||
| Betweenness |
β οΈ
SQL Limitations: SQL graph queries are less efficient than native graph databases for deep traversals. Use recursive CTEs for shallow graphs (< 10 levels).