PIVOT & UNPIVOT Patterns

Difficulty: Hard | Companies: Google, Amazon, Meta, Netflix, Uber

Native PIVOT Syntax

-- SQL Server / PostgreSQL 14+ PIVOT syntax
SELECT *
FROM sales_data
PIVOT (
  SUM(amount)
  FOR product_category IN (
    'Electronics' AS electronics,
    'Clothing' AS clothing,
    'Food' AS food,
    'Books' AS books
  )
) AS pivot_table;

ℹ️

Key Insight: The PIVOT operator requires an aggregate function. You can't use PIVOT without aggregating. For unpivoting, use UNPIVOT or CROSS JOIN with unnest functions.

Manual PIVOT with CASE

-- Cross-tabulation using conditional aggregation
SELECT
  department,
  SUM(CASE WHEN EXTRACT(QUARTER FROM sale_date) = 1 THEN amount END) AS q1,
  SUM(CASE WHEN EXTRACT(QUARTER FROM sale_date) = 2 THEN amount END) AS q2,
  SUM(CASE WHEN EXTRACT(QUARTER FROM sale_date) = 3 THEN amount END) AS q3,
  SUM(CASE WHEN EXTRACT(QUARTER FROM sale_date) = 4 THEN amount END) AS q4,
  SUM(amount) AS total
FROM sales
WHERE EXTRACT(YEAR FROM sale_date) = 2024
GROUP BY department
ORDER BY total DESC;

Dynamic PIVOT with Unknown Columns

-- PostgreSQL: Dynamic pivot using string interpolation
DO $$
DECLARE
  pivot_sql TEXT;
  col_list TEXT;
BEGIN
  -- Build column list dynamically
  SELECT STRING_AGG(
    DISTINCT '''' || product_category || ''' AS ' ||
    REPLACE(product_category, ' ', '_'),
    ', '
  )
  INTO col_list
  FROM sales_data;

  -- Construct and execute pivot query
  pivot_sql := format('
    SELECT
      department,
      %s,
      SUM(amount) AS total
    FROM sales_data
    GROUP BY department
    ORDER BY total DESC
  ', col_list);

  RAISE NOTICE '%', pivot_sql;
END $$;

BigQuery PIVOT

-- BigQuery native PIVOT syntax
SELECT *
FROM (
  SELECT
    department,
    product_category,
    amount
  FROM `project.dataset.sales`
)
PIVOT (
  SUM(amount)
  FOR product_category IN (
    'Electronics' AS electronics,
    'Clothing' AS clothing,
    'Food' AS food
  )
);

UNPIVOT with CROSS JOIN

-- Convert columns to rows (UNPIVOT pattern)
SELECT
  employee_id,
  quarter,
  amount
FROM quarterly_sales
CROSS JOIN LATERAL (
  VALUES
    ('Q1', q1_amount),
    ('Q2', q2_amount),
    ('Q3', q3_amount),
    ('Q4', q4_amount)
) AS t(quarter, amount)
WHERE amount IS NOT NULL;

⚠️

Common Pitfall: CROSS JOIN LATERAL with VALUES creates rows only for non-null values. If you need to preserve nulls, use UNION ALL with explicit SELECT statements instead.

PostgreSQL UNPIVOT with unnest

-- Using unnest with arrays for UNPIVOT
SELECT
  employee_id,
  quarter,
  amount
FROM quarterly_sales
CROSS JOIN unnest(
  ARRAY['Q1', 'Q2', 'Q3', 'Q4'],
  ARRAY[q1_amount, q2_amount, q3_amount, q4_amount]
) AS t(quarter, amount);

Matrix-style Reporting

-- Daily active users pivot for reporting
WITH daily_stats AS (
  SELECT
    DATE(event_time) AS event_date,
    platform,
    COUNT(DISTINCT user_id) AS dau
  FROM events
  WHERE event_time >= CURRENT_DATE - INTERVAL '30' DAY
  GROUP BY 1, 2
)
SELECT
  event_date,
  SUM(CASE WHEN platform = 'ios' THEN dau END) AS ios_dau,
  SUM(CASE WHEN platform = 'android' THEN dau END) AS android_dau,
  SUM(CASE WHEN platform = 'web' THEN dau END) AS web_dau,
  SUM(dau) AS total_dau,
  ROUND(
    SUM(CASE WHEN platform = 'ios' THEN dau END) * 100.0 /
    NULLIF(SUM(dau), 0), 1
  ) AS ios_pct
FROM daily_stats
GROUP BY event_date
ORDER BY event_date;

Multi-column PIVOT

-- Pivot multiple measures simultaneously
SELECT
  department,
  -- Pivoted salary data
  SUM(CASE WHEN metric = 'avg_salary' THEN value END) AS avg_salary,
  SUM(CASE WHEN metric = 'min_salary' THEN value END) AS min_salary,
  SUM(CASE WHEN metric = 'max_salary' THEN value END) AS max_salary,
  -- Pivoted headcount data
  SUM(CASE WHEN metric = 'headcount' THEN value END) AS headcount,
  SUM(CASE WHEN metric = 'new_hires' THEN value END) AS new_hires
FROM (
  SELECT
    department,
    'avg_salary' AS metric,
    AVG(salary) AS value
  FROM employees GROUP BY department

  UNION ALL

  SELECT
    department,
    'min_salary' AS metric,
    MIN(salary) AS value
  FROM employees GROUP BY department

  UNION ALL

  SELECT
    department,
    'max_salary' AS metric,
    MAX(salary) AS value
  FROM employees GROUP BY department

  UNION ALL

  SELECT
    department,
    'headcount' AS metric,
    COUNT(*)::NUMERIC AS value
  FROM employees GROUP BY department

  UNION ALL

  SELECT
    department,
    'new_hires' AS metric,
    COUNT(*)::NUMERIC AS value
  FROM employees
  WHERE hire_date >= CURRENT_DATE - INTERVAL '90' DAY
  GROUP BY department
) sub
GROUP BY department;

Recursive PIVOT for Variable Depth

-- Pivot with recursive depth expansion
WITH RECURSIVE categories AS (
  SELECT
    category_id,
    category_name,
    parent_id,
    1 AS depth
  FROM product_categories
  WHERE parent_id IS NULL

  UNION ALL

  SELECT
    pc.category_id,
    pc.category_name,
    pc.parent_id,
    c.depth + 1
  FROM product_categories pc
  INNER JOIN categories c ON pc.parent_id = c.category_id
  WHERE c.depth < 5
)
SELECT
  parent_id,
  SUM(CASE WHEN depth = 1 THEN 1 ELSE 0 END) AS level_1_count,
  SUM(CASE WHEN depth = 2 THEN 1 ELSE 0 END) AS level_2_count,
  SUM(CASE WHEN depth = 3 THEN 1 ELSE 0 END) AS level_3_count,
  SUM(CASE WHEN depth = 4 THEN 1 ELSE 0 END) AS level_4_count
FROM categories
GROUP BY parent_id;

PIVOT with Percentage Calculation

-- Pivot with row-wise percentage calculation
WITH pivoted AS (
  SELECT
    department,
    SUM(CASE WHEN status = 'active' THEN 1 ELSE 0 END) AS active,
    SUM(CASE WHEN status = 'inactive' THEN 1 ELSE 0 END) AS inactive,
    SUM(CASE WHEN status = 'pending' THEN 1 ELSE 0 END) AS pending,
    COUNT(*) AS total
  FROM employees
  GROUP BY department
)
SELECT
  department,
  active,
  inactive,
  pending,
  total,
  ROUND(active * 100.0 / NULLIF(total, 0), 1) AS active_pct,
  ROUND(inactive * 100.0 / NULLIF(total, 0), 1) AS inactive_pct,
  ROUND(pending * 100.0 / NULLIF(total, 0), 1) AS pending_pct
FROM pivoted
ORDER BY total DESC;

Conditional PIVOT with WHERE

-- PIVOT with filtering before aggregation
SELECT
  region,
  SUM(CASE WHEN EXTRACT(MONTH FROM order_date) = 1 THEN revenue END) AS jan,
  SUM(CASE WHEN EXTRACT(MONTH FROM order_date) = 2 THEN revenue END) AS feb,
  SUM(CASE WHEN EXTRACT(MONTH FROM order_date) = 3 THEN revenue END) AS mar
FROM orders
WHERE order_status = 'completed'
  AND order_date >= '2024-01-01'
  AND order_date < '2024-04-01'
GROUP BY region
ORDER BY SUM(revenue) DESC;

Follow-Up Questions

How would you pivot data when the number of columns is unknown at query time?
What's the performance impact of using CASE-based pivoting vs native PIVOT?
How do you handle NULL values in UNPIVOT operations?
Explain how to pivot multiple measures without using UNION ALL.
What's the best approach for pivoting data in a columnar database like BigQuery?
How would you create a running total within each pivoted column?