Recursive CTEs & Hierarchical Data
Difficulty: Hard | Companies: Google, Amazon, Meta, Netflix, Uber
Recursive CTE Syntax
-- Basic recursive CTE structure
WITH RECURSIVE org_chart AS (
-- Anchor member: starting point
SELECT
employee_id,
name,
manager_id,
1 AS level,
name::TEXT AS path
FROM employees
WHERE manager_id IS NULL
UNION ALL
-- Recursive member: joins with previous result
SELECT
e.employee_id,
e.name,
e.manager_id,
oc.level + 1,
oc.path || ' β ' || e.name
FROM employees e
INNER JOIN org_chart oc ON e.manager_id = oc.employee_id
)
SELECT * FROM org_chart ORDER BY path;
βΉοΈ
Key Insight: The anchor member must return at least one row for recursion to start. The recursive member must reference the CTE name. PostgreSQL requires WITH RECURSIVE, while BigQuery uses just WITH.
Organizational Hierarchy Traversal
-- Full hierarchy with depth calculation
WITH RECURSIVE hierarchy AS (
SELECT
employee_id,
name,
manager_id,
0 AS depth,
ARRAY[employee_id] AS path_ids,
name::TEXT AS full_path,
salary AS root_salary
FROM employees
WHERE manager_id IS NULL
UNION ALL
SELECT
e.employee_id,
e.name,
e.manager_id,
h.depth + 1,
h.path_ids || e.employee_id,
h.full_path || ' β ' || e.name,
h.root_salary
FROM employees e
INNER JOIN hierarchy h ON e.manager_id = h.employee_id
WHERE h.depth < 10 -- Prevent infinite recursion
)
SELECT
employee_id,
name,
depth,
full_path,
root_salary,
salary,
ROUND(salary / root_salary * 100, 1) AS pct_of_root
FROM hierarchy
ORDER BY full_path;
Graph Traversal with Cycle Detection
-- Find all paths between two nodes (with cycle prevention)
WITH RECURSIVE path_finder AS (
SELECT
source_node,
target_node,
weight,
ARRAY[source_node, target_node] AS path,
weight AS total_weight,
1 AS hops
FROM edges
WHERE source_node = 'A'
UNION ALL
SELECT
pf.source_node,
e.target_node,
e.weight,
pf.path || e.target_node,
pf.total_weight + e.weight,
pf.hops + 1
FROM path_finder pf
INNER JOIN edges e ON pf.target_node = e.source_node
WHERE e.target_node != ALL(pf.path) -- Cycle detection
AND pf.hops < 20 -- Max depth limit
)
SELECT * FROM path_finder
WHERE target_node = 'Z'
ORDER BY total_weight;
β οΈ
Performance Warning: Always include a depth limit in recursive CTEs to prevent infinite loops. In cyclic graphs, the WHERE e.target_node != ALL(pf.path) clause is essential for cycle detection.
Materialized Path Pattern
-- Store and query hierarchical paths
CREATE TABLE categories (
id SERIAL PRIMARY KEY,
name VARCHAR(100),
parent_id INT REFERENCES categories(id),
path TEXT, -- Materialized path: '/1/5/12/'
depth INT
);
-- Rebuild materialized paths
WITH RECURSIVE cat_path AS (
SELECT
id,
name,
parent_id,
'/' || id::TEXT || '/' AS path,
0 AS depth
FROM categories
WHERE parent_id IS NULL
UNION ALL
SELECT
c.id,
c.name,
c.parent_id,
cp.path || c.id::TEXT || '/',
cp.depth + 1
FROM categories c
INNER JOIN cat_path cp ON c.parent_id = cp.id
)
UPDATE categories
SET path = cp.path, depth = cp.depth
FROM cat_path cp
WHERE categories.id = cp.id;
-- Query: Find all descendants of a category
WITH RECURSIVE descendants AS (
SELECT id, name, parent_id, path, depth
FROM categories
WHERE id = 5 -- Starting category
UNION ALL
SELECT c.id, c.name, c.parent_id, c.path, c.depth
FROM categories c
INNER JOIN descendants d ON c.parent_id = d.id
)
SELECT * FROM descendants ORDER BY path;
Adjacency List vs Materialized Path
-- Convert adjacency list to materialized path
WITH RECURSIVE path_builder AS (
SELECT
id,
name,
parent_id,
'/' || id::TEXT || '/' AS path,
name::TEXT AS full_name,
1 AS depth
FROM categories
WHERE parent_id IS NULL
UNION ALL
SELECT
c.id,
c.name,
c.parent_id,
pb.path || c.id::TEXT || '/',
pb.full_name || ' > ' || c.name,
pb.depth + 1
FROM categories c
INNER JOIN path_builder pb ON c.parent_id = pb.id
)
SELECT
id,
name,
path,
full_name,
depth,
REPEAT(' ', depth) || name AS indented_name
FROM path_builder
ORDER BY path;
Recursive Aggregation
-- Sum up costs through a dependency chain
WITH RECURSIVE cost_chain AS (
SELECT
project_id,
task_id,
task_name,
direct_cost,
direct_cost AS cumulative_cost,
ARRAY[task_id] AS task_chain
FROM project_tasks
WHERE parent_task_id IS NULL
UNION ALL
SELECT
pt.project_id,
pt.task_id,
pt.task_name,
pt.direct_cost,
cc.cumulative_cost + pt.direct_cost,
cc.task_chain || pt.task_id
FROM project_tasks pt
INNER JOIN cost_chain cc ON pt.parent_task_id = cc.task_id
)
SELECT
project_id,
task_id,
task_name,
direct_cost,
cumulative_cost,
task_chain
FROM cost_chain
ORDER BY task_chain;
Breadth-First Search with Recursive CTE
-- BFS traversal with level tracking
WITH RECURSIVE bfs AS (
SELECT
node_id,
0 AS level,
ARRAY[node_id] AS visited,
node_id::TEXT AS path
FROM graph_nodes
WHERE node_id = 'start_node'
UNION
SELECT
gn.node_id,
bfs.level + 1,
bfs.visited || gn.node_id,
bfs.path || ' β ' || gn.node_id
FROM graph_nodes gn
INNER JOIN graph_edges ge ON gn.node_id = ge.target_node
INNER JOIN bfs ON ge.source_node = bfs.node_id
WHERE gn.node_id != ALL(bfs.visited)
)
SELECT
node_id,
level,
path
FROM bfs
ORDER BY level, node_id;
βΉοΈ
UNION vs UNION ALL: Use UNION (not UNION ALL) in BFS to automatically deduplicate nodes at the same level. This prevents processing the same node multiple times.
Recursive Date Generation
-- Generate a date series recursively
WITH RECURSIVE date_series AS (
SELECT
'2024-01-01'::DATE AS date,
1 AS day_number
UNION ALL
SELECT
date + INTERVAL '1' DAY,
day_number + 1
FROM date_series
WHERE date < '2024-12-31'::DATE
)
SELECT
date,
day_number,
EXTRACT(DOW FROM date) AS day_of_week,
EXTRACT(MONTH FROM date) AS month
FROM date_series;
Tree Flattening with Ordering
-- Flatten a tree with proper DFS ordering
WITH RECURSIVE tree_dfs AS (
SELECT
id,
name,
parent_id,
0 AS depth,
LPAD(id::TEXT, 10, '0') AS sort_key
FROM tree_nodes
WHERE parent_id IS NULL
UNION ALL
SELECT
tn.id,
tn.name,
tn.parent_id,
td.depth + 1,
LPAD(td.sort_key || '/' || LPAD(tn.id::TEXT, 5, '0'), 20, '0')
FROM tree_nodes tn
INNER JOIN tree_dfs td ON tn.parent_id = td.id
)
SELECT
REPEAT(' ', depth) || name AS display_name,
depth,
sort_key
FROM tree_dfs
ORDER BY sort_key;
Follow-Up Questions
- How do you prevent infinite recursion in cyclic graphs?
- What's the performance difference between recursive CTEs and iterative approaches?
- How would you find the shortest path in an unweighted graph using recursive CTEs?
- Explain the difference between
UNIONandUNION ALLin recursive CTEs. - How do you limit recursion depth in PostgreSQL vs BigQuery?
- When should you use a materialized path pattern vs an adjacency list?