PIVOT & UNPIVOT Patterns
Difficulty: Hard | Companies: Google, Amazon, Meta, Netflix, Uber
Native PIVOT Syntax
-- SQL Server / PostgreSQL 14+ PIVOT syntax
SELECT *
FROM sales_data
PIVOT (
SUM(amount)
FOR product_category IN (
'Electronics' AS electronics,
'Clothing' AS clothing,
'Food' AS food,
'Books' AS books
)
) AS pivot_table;
βΉοΈ
Key Insight: The PIVOT operator requires an aggregate function. You can't use PIVOT without aggregating. For unpivoting, use UNPIVOT or CROSS JOIN with unnest functions.
Manual PIVOT with CASE
-- Cross-tabulation using conditional aggregation
SELECT
department,
SUM(CASE WHEN EXTRACT(QUARTER FROM sale_date) = 1 THEN amount END) AS q1,
SUM(CASE WHEN EXTRACT(QUARTER FROM sale_date) = 2 THEN amount END) AS q2,
SUM(CASE WHEN EXTRACT(QUARTER FROM sale_date) = 3 THEN amount END) AS q3,
SUM(CASE WHEN EXTRACT(QUARTER FROM sale_date) = 4 THEN amount END) AS q4,
SUM(amount) AS total
FROM sales
WHERE EXTRACT(YEAR FROM sale_date) = 2024
GROUP BY department
ORDER BY total DESC;
Dynamic PIVOT with Unknown Columns
-- PostgreSQL: Dynamic pivot using string interpolation
DO $$
DECLARE
pivot_sql TEXT;
col_list TEXT;
BEGIN
-- Build column list dynamically
SELECT STRING_AGG(
DISTINCT '''' || product_category || ''' AS ' ||
REPLACE(product_category, ' ', '_'),
', '
)
INTO col_list
FROM sales_data;
-- Construct and execute pivot query
pivot_sql := format('
SELECT
department,
%s,
SUM(amount) AS total
FROM sales_data
GROUP BY department
ORDER BY total DESC
', col_list);
RAISE NOTICE '%', pivot_sql;
END $$;
BigQuery PIVOT
-- BigQuery native PIVOT syntax
SELECT *
FROM (
SELECT
department,
product_category,
amount
FROM `project.dataset.sales`
)
PIVOT (
SUM(amount)
FOR product_category IN (
'Electronics' AS electronics,
'Clothing' AS clothing,
'Food' AS food
)
);
UNPIVOT with CROSS JOIN
-- Convert columns to rows (UNPIVOT pattern)
SELECT
employee_id,
quarter,
amount
FROM quarterly_sales
CROSS JOIN LATERAL (
VALUES
('Q1', q1_amount),
('Q2', q2_amount),
('Q3', q3_amount),
('Q4', q4_amount)
) AS t(quarter, amount)
WHERE amount IS NOT NULL;
β οΈ
Common Pitfall: CROSS JOIN LATERAL with VALUES creates rows only for non-null values. If you need to preserve nulls, use UNION ALL with explicit SELECT statements instead.
PostgreSQL UNPIVOT with unnest
-- Using unnest with arrays for UNPIVOT
SELECT
employee_id,
quarter,
amount
FROM quarterly_sales
CROSS JOIN unnest(
ARRAY['Q1', 'Q2', 'Q3', 'Q4'],
ARRAY[q1_amount, q2_amount, q3_amount, q4_amount]
) AS t(quarter, amount);
Matrix-style Reporting
-- Daily active users pivot for reporting
WITH daily_stats AS (
SELECT
DATE(event_time) AS event_date,
platform,
COUNT(DISTINCT user_id) AS dau
FROM events
WHERE event_time >= CURRENT_DATE - INTERVAL '30' DAY
GROUP BY 1, 2
)
SELECT
event_date,
SUM(CASE WHEN platform = 'ios' THEN dau END) AS ios_dau,
SUM(CASE WHEN platform = 'android' THEN dau END) AS android_dau,
SUM(CASE WHEN platform = 'web' THEN dau END) AS web_dau,
SUM(dau) AS total_dau,
ROUND(
SUM(CASE WHEN platform = 'ios' THEN dau END) * 100.0 /
NULLIF(SUM(dau), 0), 1
) AS ios_pct
FROM daily_stats
GROUP BY event_date
ORDER BY event_date;
Multi-column PIVOT
-- Pivot multiple measures simultaneously
SELECT
department,
-- Pivoted salary data
SUM(CASE WHEN metric = 'avg_salary' THEN value END) AS avg_salary,
SUM(CASE WHEN metric = 'min_salary' THEN value END) AS min_salary,
SUM(CASE WHEN metric = 'max_salary' THEN value END) AS max_salary,
-- Pivoted headcount data
SUM(CASE WHEN metric = 'headcount' THEN value END) AS headcount,
SUM(CASE WHEN metric = 'new_hires' THEN value END) AS new_hires
FROM (
SELECT
department,
'avg_salary' AS metric,
AVG(salary) AS value
FROM employees GROUP BY department
UNION ALL
SELECT
department,
'min_salary' AS metric,
MIN(salary) AS value
FROM employees GROUP BY department
UNION ALL
SELECT
department,
'max_salary' AS metric,
MAX(salary) AS value
FROM employees GROUP BY department
UNION ALL
SELECT
department,
'headcount' AS metric,
COUNT(*)::NUMERIC AS value
FROM employees GROUP BY department
UNION ALL
SELECT
department,
'new_hires' AS metric,
COUNT(*)::NUMERIC AS value
FROM employees
WHERE hire_date >= CURRENT_DATE - INTERVAL '90' DAY
GROUP BY department
) sub
GROUP BY department;
Recursive PIVOT for Variable Depth
-- Pivot with recursive depth expansion
WITH RECURSIVE categories AS (
SELECT
category_id,
category_name,
parent_id,
1 AS depth
FROM product_categories
WHERE parent_id IS NULL
UNION ALL
SELECT
pc.category_id,
pc.category_name,
pc.parent_id,
c.depth + 1
FROM product_categories pc
INNER JOIN categories c ON pc.parent_id = c.category_id
WHERE c.depth < 5
)
SELECT
parent_id,
SUM(CASE WHEN depth = 1 THEN 1 ELSE 0 END) AS level_1_count,
SUM(CASE WHEN depth = 2 THEN 1 ELSE 0 END) AS level_2_count,
SUM(CASE WHEN depth = 3 THEN 1 ELSE 0 END) AS level_3_count,
SUM(CASE WHEN depth = 4 THEN 1 ELSE 0 END) AS level_4_count
FROM categories
GROUP BY parent_id;
PIVOT with Percentage Calculation
-- Pivot with row-wise percentage calculation
WITH pivoted AS (
SELECT
department,
SUM(CASE WHEN status = 'active' THEN 1 ELSE 0 END) AS active,
SUM(CASE WHEN status = 'inactive' THEN 1 ELSE 0 END) AS inactive,
SUM(CASE WHEN status = 'pending' THEN 1 ELSE 0 END) AS pending,
COUNT(*) AS total
FROM employees
GROUP BY department
)
SELECT
department,
active,
inactive,
pending,
total,
ROUND(active * 100.0 / NULLIF(total, 0), 1) AS active_pct,
ROUND(inactive * 100.0 / NULLIF(total, 0), 1) AS inactive_pct,
ROUND(pending * 100.0 / NULLIF(total, 0), 1) AS pending_pct
FROM pivoted
ORDER BY total DESC;
Conditional PIVOT with WHERE
-- PIVOT with filtering before aggregation
SELECT
region,
SUM(CASE WHEN EXTRACT(MONTH FROM order_date) = 1 THEN revenue END) AS jan,
SUM(CASE WHEN EXTRACT(MONTH FROM order_date) = 2 THEN revenue END) AS feb,
SUM(CASE WHEN EXTRACT(MONTH FROM order_date) = 3 THEN revenue END) AS mar
FROM orders
WHERE order_status = 'completed'
AND order_date >= '2024-01-01'
AND order_date < '2024-04-01'
GROUP BY region
ORDER BY SUM(revenue) DESC;
Follow-Up Questions
- How would you pivot data when the number of columns is unknown at query time?
- What's the performance impact of using CASE-based pivoting vs native PIVOT?
- How do you handle NULL values in UNPIVOT operations?
- Explain how to pivot multiple measures without using UNION ALL.
- What's the best approach for pivoting data in a columnar database like BigQuery?
- How would you create a running total within each pivoted column?