Interview Question: "How do you create a dynamic pivot table when the number of columns is unknown at compile time? Explain the performance implications of pivoting large datasets." — Asked at Tableau, Looker, PowerBI for Analytics Engineer roles
ℹ️
Difficulty: Advanced | Companies: Tableau, Looker, PowerBI, Qlik, Sisense | Time: 45-60 minutes
Static PIVOT with CASE
-- Create sales data
CREATE TABLE monthly_sales (
year INT,
month INT,
region VARCHAR(20),
product VARCHAR(50),
revenue DECIMAL(12,2)
);
INSERT INTO monthly_sales VALUES
(2024, 1, 'North', 'Laptop', 150000),
(2024, 1, 'North', 'Phone', 200000),
(2024, 1, 'South', 'Laptop', 120000),
(2024, 1, 'South', 'Phone', 180000),
(2024, 2, 'North', 'Laptop', 160000),
(2024, 2, 'North', 'Phone', 210000),
(2024, 2, 'South', 'Laptop', 130000),
(2024, 2, 'South', 'Phone', 190000),
(2024, 3, 'North', 'Laptop', 170000),
(2024, 3, 'North', 'Phone', 220000),
(2024, 3, 'South', 'Laptop', 140000),
(2024, 3, 'South', 'Phone', 200000);
-- Static pivot: rows to columns
SELECT
year,
month,
SUM(CASE WHEN region = 'North' THEN revenue ELSE 0 END) AS north_revenue,
SUM(CASE WHEN region = 'South' THEN revenue ELSE 0 END) AS south_revenue,
SUM(CASE WHEN region = 'East' THEN revenue ELSE 0 END) AS east_revenue,
SUM(CASE WHEN region = 'West' THEN revenue ELSE 0 END) AS west_revenue,
SUM(revenue) AS total_revenue
FROM monthly_sales
GROUP BY year, month
ORDER BY year, month;
Output:
| year | month | north_revenue | south_revenue | east_revenue | west_revenue | total_revenue |
|---|---|---|---|---|---|---|
| 2024 | 1 | 350000 | 300000 | 0 | 0 | 650000 |
| 2024 | 2 | 370000 | 320000 | 0 | 0 | 690000 |
| 2024 | 3 | 390000 | 340000 | 0 | 0 | 730000 |
crosstab Function (tablefunc)
-- Enable tablefunc extension
CREATE EXTENSION IF NOT EXISTS tablefunc;
-- Basic crosstab
SELECT * FROM crosstab(
'SELECT year, month, SUM(revenue)::INT
FROM monthly_sales
GROUP BY year, month
ORDER BY year, month',
'SELECT DISTINCT month FROM monthly_sales ORDER BY month'
) AS ct(year INT, jan INT, feb INT, mar INT);
-- crosstab with multiple value columns
SELECT * FROM crosstab(
'SELECT year, region, SUM(revenue)::INT
FROM monthly_sales
GROUP BY year, region
ORDER BY year, region',
'SELECT DISTINCT region FROM monthly_sales ORDER BY region'
) AS ct(year INT, north INT, south INT);
Output:
| year | jan | feb | mar |
|---|---|---|---|
| 2024 | 650000 | 690000 | 730000 |
Dynamic PIVOT with EXECUTE
-- Dynamic pivot for unknown columns
DO $$
DECLARE
sql TEXT;
col_list TEXT;
regions TEXT[];
BEGIN
-- Get distinct regions
SELECT ARRAY_AGG(DISTINCT region ORDER BY region) INTO regions
FROM monthly_sales;
-- Build column list
SELECT STRING_AGG(
format('SUM(CASE WHEN region = %L THEN revenue ELSE 0 END) AS %I',
region, LOWER(region)),
', '
) INTO col_list
FROM unnest(regions) AS region;
-- Build full query
sql := format('
SELECT
year,
month,
%s,
SUM(revenue) AS total_revenue
FROM monthly_sales
GROUP BY year, month
ORDER BY year, month',
col_list
);
-- Execute and return results
RAISE NOTICE '%', sql;
EXECUTE sql;
END $$;
UNPIVOT with UNION ALL
-- Create pivoted table
CREATE TABLE pivoted_sales (
year INT,
north_revenue DECIMAL(12,2),
south_revenue DECIMAL(12,2),
east_revenue DECIMAL(12,2),
west_revenue DECIMAL(12,2)
);
INSERT INTO pivoted_sales VALUES
(2024, 350000, 300000, 0, 0),
(2024, 370000, 320000, 0, 0),
(2024, 390000, 340000, 0, 0);
-- Unpivot with UNION ALL
SELECT year, 'North' AS region, north_revenue AS revenue
FROM pivoted_sales
UNION ALL
SELECT year, 'South' AS region, south_revenue AS revenue
FROM pivoted_sales
UNION ALL
SELECT year, 'East' AS region, east_revenue AS revenue
FROM pivoted_sales
UNION ALL
SELECT year, 'West' AS region, west_revenue AS revenue
FROM pivoted_sales
ORDER BY year, region;
-- Unpivot with LATERAL JOIN (more efficient)
SELECT
p.year,
r.region,
r.revenue
FROM pivoted_sales p
CROSS JOIN LATERAL (
VALUES
('North', p.north_revenue),
('South', p.south_revenue),
('East', p.east_revenue),
('West', p.west_revenue)
) AS r(region, revenue)
WHERE r.revenue > 0
ORDER BY p.year, r.region;
Dynamic UNPIVOT
-- Dynamic unpivot using hstore
CREATE EXTENSION IF NOT EXISTS hstore;
SELECT
year,
(each(hstore(p) - ARRAY['year']::text[])).key AS region,
(each(hstore(p) - ARRAY['year']::text[])).value::decimal AS revenue
FROM pivoted_sales p
ORDER BY year, region;
-- Alternative: Using jsonb
SELECT
year,
key AS region,
value::decimal AS revenue
FROM pivoted_sales,
LATERAL jsonb_each_text(to_jsonb(pivoted_sales) - 'year') AS j(key, value)
WHERE value::decimal > 0
ORDER BY year, key;
Matrix Query Pattern
-- Create student grades table
CREATE TABLE student_grades (
student_id INT,
subject VARCHAR(50),
grade DECIMAL(5,2)
);
INSERT INTO student_grades VALUES
(1, 'Math', 95.5),
(1, 'Science', 88.0),
(1, 'English', 92.0),
(2, 'Math', 85.0),
(2, 'Science', 90.5),
(2, 'English', 88.5),
(3, 'Math', 92.0),
(3, 'Science', 95.0),
(3, 'English', 91.0);
-- Matrix: students as rows, subjects as columns
SELECT
s.student_name,
MAX(CASE WHEN sg.subject = 'Math' THEN sg.grade END) AS math,
MAX(CASE WHEN sg.subject = 'Science' THEN sg.grade END) AS science,
MAX(CASE WHEN sg.subject = 'English' THEN sg.grade END) AS english,
ROUND(AVG(sg.grade), 2) AS average
FROM students s
JOIN student_grades sg ON s.student_id = sg.student_id
GROUP BY s.student_id, s.student_name
ORDER BY s.student_name;
Output:
| student_name | math | science | english | average |
|---|---|---|---|---|
| Alice | 95.50 | 88.00 | 92.00 | 91.83 |
| Bob | 85.00 | 90.50 | 88.50 | 88.00 |
| Charlie | 92.00 | 95.00 | 91.00 | 92.67 |
Pivot with Multiple Aggregations
-- Pivot with SUM, AVG, COUNT
SELECT
year,
month,
SUM(CASE WHEN region = 'North' THEN revenue ELSE 0 END) AS north_total,
ROUND(AVG(CASE WHEN region = 'North' THEN revenue END), 2) AS north_avg,
COUNT(CASE WHEN region = 'North' THEN 1 END) AS north_count,
SUM(CASE WHEN region = 'South' THEN revenue ELSE 0 END) AS south_total,
ROUND(AVG(CASE WHEN region = 'South' THEN revenue END), 2) AS south_avg,
COUNT(CASE WHEN region = 'South' THEN 1 END) AS south_count
FROM monthly_sales
GROUP BY year, month
ORDER BY year, month;
Mathematical Properties
Pivot transformation:
Unpivot transformation:
Matrix representation:
ℹ️
Performance Tip: Pivot operations require full table scans. For large datasets, pre-aggregate data before pivoting.
Pivot with Window Functions
-- Pivot with ranking
WITH pivoted AS (
SELECT
year,
month,
SUM(CASE WHEN region = 'North' THEN revenue ELSE 0 END) AS north,
SUM(CASE WHEN region = 'South' THEN revenue ELSE 0 END) AS south
FROM monthly_sales
GROUP BY year, month
)
SELECT
year,
month,
north,
south,
RANK() OVER (ORDER BY north DESC) AS north_rank,
RANK() OVER (ORDER BY south DESC) AS south_ratio,
north::decimal / NULLIF(south, 0) AS north_south_ratio
FROM pivoted
ORDER BY year, month;
Cross-Tabulation Analysis
-- Contingency table for chi-square test
WITH contingency AS (
SELECT
region,
product,
COUNT(*) AS frequency
FROM monthly_sales
GROUP BY region, product
)
SELECT
region,
SUM(CASE WHEN product = 'Laptop' THEN frequency ELSE 0 END) AS laptop,
SUM(CASE WHEN product = 'Phone' THEN frequency ELSE 0 END) AS phone,
SUM(frequency) AS total
FROM contingency
GROUP BY region
ORDER BY region;
-- Expected frequencies for chi-square
WITH observed AS (
SELECT
region,
product,
COUNT(*) AS observed
FROM monthly_sales
GROUP BY region, product
),
totals AS (
SELECT
SUM(observed) AS grand_total,
SUM(CASE WHEN product = 'Laptop' THEN observed ELSE 0 END) AS laptop_total,
SUM(CASE WHEN product = 'Phone' THEN observed ELSE 0 END) AS phone_total,
SUM(CASE WHEN region = 'North' THEN observed ELSE 0 END) AS north_total,
SUM(CASE WHEN region = 'South' THEN observed ELSE 0 END) AS south_total
FROM observed
)
SELECT
o.region,
o.product,
o.observed,
ROUND(
CASE
WHEN o.region = 'North' AND o.product = 'Laptop'
THEN t.north_total * t.laptop_total::decimal / t.grand_total
WHEN o.region = 'North' AND o.product = 'Phone'
THEN t.north_total * t.phone_total::decimal / t.grand_total
WHEN o.region = 'South' AND o.product = 'Laptop'
THEN t.south_total * t.laptop_total::decimal / t.grand_total
WHEN o.region = 'South' AND o.product = 'Phone'
THEN t.south_total * t.phone_total::decimal / t.grand_total
END, 2
) AS expected,
ROUND(
POWER(o.observed -
CASE
WHEN o.region = 'North' AND o.product = 'Laptop'
THEN t.north_total * t.laptop_total::decimal / t.grand_total
WHEN o.region = 'North' AND o.product = 'Phone'
THEN t.north_total * t.phone_total::decimal / t.grand_total
WHEN o.region = 'South' AND o.product = 'Laptop'
THEN t.south_total * t.laptop_total::decimal / t.grand_total
WHEN o.region = 'South' AND o.product = 'Phone'
THEN t.south_total * t.phone_total::decimal / t.grand_total
END, 2) /
CASE
WHEN o.region = 'North' AND o.product = 'Laptop'
THEN t.north_total * t.laptop_total::decimal / t.grand_total
WHEN o.region = 'North' AND o.product = 'Phone'
THEN t.north_total * t.phone_total::decimal / t.grand_total
WHEN o.region = 'South' AND o.product = 'Laptop'
THEN t.south_total * t.laptop_total::decimal / t.grand_total
WHEN o.region = 'South' AND o.product = 'Phone'
THEN t.south_total * t.phone_total::decimal / t.grand_total
END, 2
) AS chi_square_component
FROM observed o
CROSS JOIN totals t
ORDER BY o.region, o.product;
Performance Comparison
| Method | Time Complexity | Space | Dynamic | Database |
|---|---|---|---|---|
| CASE WHEN | No | All | ||
| crosstab | No | PostgreSQL | ||
| Dynamic SQL | Yes | All | ||
| LATERAL JOIN | Yes | PostgreSQL |
⚠️
Memory Warning: Pivoting large datasets can consume significant memory. Use work_mem setting and consider materializing intermediate results.