πŸŽ‰ 75% of content is free forever β€” Unlock Premium from $10/mo β†’
CW
Search courses…
πŸ’Ό Servicesℹ️ Aboutβœ‰οΈ ContactView Pricing Plansfrom $10

Recursive CTEs & Hierarchical Data

Advanced SQLCommon Table Expressions⭐ Premium

Advertisement

Recursive CTEs & Hierarchical Data

Difficulty: Hard | Companies: Google, Amazon, Meta, Netflix, Uber

Recursive CTE Syntax

-- Basic recursive CTE structure
WITH RECURSIVE org_chart AS (
  -- Anchor member: starting point
  SELECT
    employee_id,
    name,
    manager_id,
    1 AS level,
    name::TEXT AS path
  FROM employees
  WHERE manager_id IS NULL

  UNION ALL

  -- Recursive member: joins with previous result
  SELECT
    e.employee_id,
    e.name,
    e.manager_id,
    oc.level + 1,
    oc.path || ' β†’ ' || e.name
  FROM employees e
  INNER JOIN org_chart oc ON e.manager_id = oc.employee_id
)
SELECT * FROM org_chart ORDER BY path;

ℹ️

Key Insight: The anchor member must return at least one row for recursion to start. The recursive member must reference the CTE name. PostgreSQL requires WITH RECURSIVE, while BigQuery uses just WITH.

Organizational Hierarchy Traversal

-- Full hierarchy with depth calculation
WITH RECURSIVE hierarchy AS (
  SELECT
    employee_id,
    name,
    manager_id,
    0 AS depth,
    ARRAY[employee_id] AS path_ids,
    name::TEXT AS full_path,
    salary AS root_salary
  FROM employees
  WHERE manager_id IS NULL

  UNION ALL

  SELECT
    e.employee_id,
    e.name,
    e.manager_id,
    h.depth + 1,
    h.path_ids || e.employee_id,
    h.full_path || ' β†’ ' || e.name,
    h.root_salary
  FROM employees e
  INNER JOIN hierarchy h ON e.manager_id = h.employee_id
  WHERE h.depth < 10  -- Prevent infinite recursion
)
SELECT
  employee_id,
  name,
  depth,
  full_path,
  root_salary,
  salary,
  ROUND(salary / root_salary * 100, 1) AS pct_of_root
FROM hierarchy
ORDER BY full_path;

Graph Traversal with Cycle Detection

-- Find all paths between two nodes (with cycle prevention)
WITH RECURSIVE path_finder AS (
  SELECT
    source_node,
    target_node,
    weight,
    ARRAY[source_node, target_node] AS path,
    weight AS total_weight,
    1 AS hops
  FROM edges
  WHERE source_node = 'A'

  UNION ALL

  SELECT
    pf.source_node,
    e.target_node,
    e.weight,
    pf.path || e.target_node,
    pf.total_weight + e.weight,
    pf.hops + 1
  FROM path_finder pf
  INNER JOIN edges e ON pf.target_node = e.source_node
  WHERE e.target_node != ALL(pf.path)  -- Cycle detection
    AND pf.hops < 20  -- Max depth limit
)
SELECT * FROM path_finder
WHERE target_node = 'Z'
ORDER BY total_weight;

⚠️

Performance Warning: Always include a depth limit in recursive CTEs to prevent infinite loops. In cyclic graphs, the WHERE e.target_node != ALL(pf.path) clause is essential for cycle detection.

Materialized Path Pattern

-- Store and query hierarchical paths
CREATE TABLE categories (
  id SERIAL PRIMARY KEY,
  name VARCHAR(100),
  parent_id INT REFERENCES categories(id),
  path TEXT,  -- Materialized path: '/1/5/12/'
  depth INT
);

-- Rebuild materialized paths
WITH RECURSIVE cat_path AS (
  SELECT
    id,
    name,
    parent_id,
    '/' || id::TEXT || '/' AS path,
    0 AS depth
  FROM categories
  WHERE parent_id IS NULL

  UNION ALL

  SELECT
    c.id,
    c.name,
    c.parent_id,
    cp.path || c.id::TEXT || '/',
    cp.depth + 1
  FROM categories c
  INNER JOIN cat_path cp ON c.parent_id = cp.id
)
UPDATE categories
SET path = cp.path, depth = cp.depth
FROM cat_path cp
WHERE categories.id = cp.id;

-- Query: Find all descendants of a category
WITH RECURSIVE descendants AS (
  SELECT id, name, parent_id, path, depth
  FROM categories
  WHERE id = 5  -- Starting category

  UNION ALL

  SELECT c.id, c.name, c.parent_id, c.path, c.depth
  FROM categories c
  INNER JOIN descendants d ON c.parent_id = d.id
)
SELECT * FROM descendants ORDER BY path;

Adjacency List vs Materialized Path

-- Convert adjacency list to materialized path
WITH RECURSIVE path_builder AS (
  SELECT
    id,
    name,
    parent_id,
    '/' || id::TEXT || '/' AS path,
    name::TEXT AS full_name,
    1 AS depth
  FROM categories
  WHERE parent_id IS NULL

  UNION ALL

  SELECT
    c.id,
    c.name,
    c.parent_id,
    pb.path || c.id::TEXT || '/',
    pb.full_name || ' > ' || c.name,
    pb.depth + 1
  FROM categories c
  INNER JOIN path_builder pb ON c.parent_id = pb.id
)
SELECT
  id,
  name,
  path,
  full_name,
  depth,
  REPEAT('  ', depth) || name AS indented_name
FROM path_builder
ORDER BY path;

Recursive Aggregation

-- Sum up costs through a dependency chain
WITH RECURSIVE cost_chain AS (
  SELECT
    project_id,
    task_id,
    task_name,
    direct_cost,
    direct_cost AS cumulative_cost,
    ARRAY[task_id] AS task_chain
  FROM project_tasks
  WHERE parent_task_id IS NULL

  UNION ALL

  SELECT
    pt.project_id,
    pt.task_id,
    pt.task_name,
    pt.direct_cost,
    cc.cumulative_cost + pt.direct_cost,
    cc.task_chain || pt.task_id
  FROM project_tasks pt
  INNER JOIN cost_chain cc ON pt.parent_task_id = cc.task_id
)
SELECT
  project_id,
  task_id,
  task_name,
  direct_cost,
  cumulative_cost,
  task_chain
FROM cost_chain
ORDER BY task_chain;

Breadth-First Search with Recursive CTE

-- BFS traversal with level tracking
WITH RECURSIVE bfs AS (
  SELECT
    node_id,
    0 AS level,
    ARRAY[node_id] AS visited,
    node_id::TEXT AS path
  FROM graph_nodes
  WHERE node_id = 'start_node'

  UNION

  SELECT
    gn.node_id,
    bfs.level + 1,
    bfs.visited || gn.node_id,
    bfs.path || ' β†’ ' || gn.node_id
  FROM graph_nodes gn
  INNER JOIN graph_edges ge ON gn.node_id = ge.target_node
  INNER JOIN bfs ON ge.source_node = bfs.node_id
  WHERE gn.node_id != ALL(bfs.visited)
)
SELECT
  node_id,
  level,
  path
FROM bfs
ORDER BY level, node_id;

ℹ️

UNION vs UNION ALL: Use UNION (not UNION ALL) in BFS to automatically deduplicate nodes at the same level. This prevents processing the same node multiple times.

Recursive Date Generation

-- Generate a date series recursively
WITH RECURSIVE date_series AS (
  SELECT
    '2024-01-01'::DATE AS date,
    1 AS day_number

  UNION ALL

  SELECT
    date + INTERVAL '1' DAY,
    day_number + 1
  FROM date_series
  WHERE date < '2024-12-31'::DATE
)
SELECT
  date,
  day_number,
  EXTRACT(DOW FROM date) AS day_of_week,
  EXTRACT(MONTH FROM date) AS month
FROM date_series;

Tree Flattening with Ordering

-- Flatten a tree with proper DFS ordering
WITH RECURSIVE tree_dfs AS (
  SELECT
    id,
    name,
    parent_id,
    0 AS depth,
    LPAD(id::TEXT, 10, '0') AS sort_key
  FROM tree_nodes
  WHERE parent_id IS NULL

  UNION ALL

  SELECT
    tn.id,
    tn.name,
    tn.parent_id,
    td.depth + 1,
    LPAD(td.sort_key || '/' || LPAD(tn.id::TEXT, 5, '0'), 20, '0')
  FROM tree_nodes tn
  INNER JOIN tree_dfs td ON tn.parent_id = td.id
)
SELECT
  REPEAT('  ', depth) || name AS display_name,
  depth,
  sort_key
FROM tree_dfs
ORDER BY sort_key;

Follow-Up Questions

  1. How do you prevent infinite recursion in cyclic graphs?
  2. What's the performance difference between recursive CTEs and iterative approaches?
  3. How would you find the shortest path in an unweighted graph using recursive CTEs?
  4. Explain the difference between UNION and UNION ALL in recursive CTEs.
  5. How do you limit recursion depth in PostgreSQL vs BigQuery?
  6. When should you use a materialized path pattern vs an adjacency list?

Advertisement