Dynamic PIVOT/UNPIVOT: Crosstab Reports, Matrix Queries

Interview Question: "How do you create a dynamic pivot table when the number of columns is unknown at compile time? Explain the performance implications of pivoting large datasets." — Asked at Tableau, Looker, PowerBI for Analytics Engineer roles

ℹ️

Difficulty: Advanced | Companies: Tableau, Looker, PowerBI, Qlik, Sisense | Time: 45-60 minutes

Static PIVOT with CASE

-- Create sales data
CREATE TABLE monthly_sales (
    year INT,
    month INT,
    region VARCHAR(20),
    product VARCHAR(50),
    revenue DECIMAL(12,2)
);

INSERT INTO monthly_sales VALUES
(2024, 1, 'North', 'Laptop', 150000),
(2024, 1, 'North', 'Phone', 200000),
(2024, 1, 'South', 'Laptop', 120000),
(2024, 1, 'South', 'Phone', 180000),
(2024, 2, 'North', 'Laptop', 160000),
(2024, 2, 'North', 'Phone', 210000),
(2024, 2, 'South', 'Laptop', 130000),
(2024, 2, 'South', 'Phone', 190000),
(2024, 3, 'North', 'Laptop', 170000),
(2024, 3, 'North', 'Phone', 220000),
(2024, 3, 'South', 'Laptop', 140000),
(2024, 3, 'South', 'Phone', 200000);

-- Static pivot: rows to columns
SELECT 
    year,
    month,
    SUM(CASE WHEN region = 'North' THEN revenue ELSE 0 END) AS north_revenue,
    SUM(CASE WHEN region = 'South' THEN revenue ELSE 0 END) AS south_revenue,
    SUM(CASE WHEN region = 'East' THEN revenue ELSE 0 END) AS east_revenue,
    SUM(CASE WHEN region = 'West' THEN revenue ELSE 0 END) AS west_revenue,
    SUM(revenue) AS total_revenue
FROM monthly_sales
GROUP BY year, month
ORDER BY year, month;

Output:

year	month	north_revenue	south_revenue	total_revenue
2024	1	350000	300000	650000
2024	2	370000	320000	690000
2024	3	390000	340000	730000

crosstab Function (tablefunc)

-- Enable tablefunc extension
CREATE EXTENSION IF NOT EXISTS tablefunc;

-- Basic crosstab
SELECT * FROM crosstab(
    'SELECT year, month, SUM(revenue)::INT 
     FROM monthly_sales 
     GROUP BY year, month 
     ORDER BY year, month',
    'SELECT DISTINCT month FROM monthly_sales ORDER BY month'
) AS ct(year INT, jan INT, feb INT, mar INT);

-- crosstab with multiple value columns
SELECT * FROM crosstab(
    'SELECT year, region, SUM(revenue)::INT 
     FROM monthly_sales 
     GROUP BY year, region 
     ORDER BY year, region',
    'SELECT DISTINCT region FROM monthly_sales ORDER BY region'
) AS ct(year INT, north INT, south INT);

Output:

year	jan	feb	mar
2024	650000	690000	730000

Dynamic PIVOT with EXECUTE

-- Dynamic pivot for unknown columns
DO $$
DECLARE
    sql TEXT;
    col_list TEXT;
    regions TEXT[];
BEGIN
    -- Get distinct regions
    SELECT ARRAY_AGG(DISTINCT region ORDER BY region) INTO regions
    FROM monthly_sales;
    
    -- Build column list
    SELECT STRING_AGG(
        format('SUM(CASE WHEN region = %L THEN revenue ELSE 0 END) AS %I', 
               region, LOWER(region)),
        ', '
    ) INTO col_list
    FROM unnest(regions) AS region;
    
    -- Build full query
    sql := format('
        SELECT 
            year,
            month,
            %s,
            SUM(revenue) AS total_revenue
        FROM monthly_sales
        GROUP BY year, month
        ORDER BY year, month',
        col_list
    );
    
    -- Execute and return results
    RAISE NOTICE '%', sql;
    EXECUTE sql;
END $$;

UNPIVOT with UNION ALL

-- Create pivoted table
CREATE TABLE pivoted_sales (
    year INT,
    north_revenue DECIMAL(12,2),
    south_revenue DECIMAL(12,2),
    east_revenue DECIMAL(12,2),
    west_revenue DECIMAL(12,2)
);

INSERT INTO pivoted_sales VALUES
(2024, 350000, 300000, 0, 0),
(2024, 370000, 320000, 0, 0),
(2024, 390000, 340000, 0, 0);

-- Unpivot with UNION ALL
SELECT year, 'North' AS region, north_revenue AS revenue
FROM pivoted_sales
UNION ALL
SELECT year, 'South' AS region, south_revenue AS revenue
FROM pivoted_sales
UNION ALL
SELECT year, 'East' AS region, east_revenue AS revenue
FROM pivoted_sales
UNION ALL
SELECT year, 'West' AS region, west_revenue AS revenue
FROM pivoted_sales
ORDER BY year, region;

-- Unpivot with LATERAL JOIN (more efficient)
SELECT 
    p.year,
    r.region,
    r.revenue
FROM pivoted_sales p
CROSS JOIN LATERAL (
    VALUES 
        ('North', p.north_revenue),
        ('South', p.south_revenue),
        ('East', p.east_revenue),
        ('West', p.west_revenue)
) AS r(region, revenue)
WHERE r.revenue > 0
ORDER BY p.year, r.region;

Dynamic UNPIVOT

-- Dynamic unpivot using hstore
CREATE EXTENSION IF NOT EXISTS hstore;

SELECT 
    year,
    (each(hstore(p) - ARRAY['year']::text[])).key AS region,
    (each(hstore(p) - ARRAY['year']::text[])).value::decimal AS revenue
FROM pivoted_sales p
ORDER BY year, region;

-- Alternative: Using jsonb
SELECT 
    year,
    key AS region,
    value::decimal AS revenue
FROM pivoted_sales,
LATERAL jsonb_each_text(to_jsonb(pivoted_sales) - 'year') AS j(key, value)
WHERE value::decimal > 0
ORDER BY year, key;

Matrix Query Pattern

-- Create student grades table
CREATE TABLE student_grades (
    student_id INT,
    subject VARCHAR(50),
    grade DECIMAL(5,2)
);

INSERT INTO student_grades VALUES
(1, 'Math', 95.5),
(1, 'Science', 88.0),
(1, 'English', 92.0),
(2, 'Math', 85.0),
(2, 'Science', 90.5),
(2, 'English', 88.5),
(3, 'Math', 92.0),
(3, 'Science', 95.0),
(3, 'English', 91.0);

-- Matrix: students as rows, subjects as columns
SELECT 
    s.student_name,
    MAX(CASE WHEN sg.subject = 'Math' THEN sg.grade END) AS math,
    MAX(CASE WHEN sg.subject = 'Science' THEN sg.grade END) AS science,
    MAX(CASE WHEN sg.subject = 'English' THEN sg.grade END) AS english,
    ROUND(AVG(sg.grade), 2) AS average
FROM students s
JOIN student_grades sg ON s.student_id = sg.student_id
GROUP BY s.student_id, s.student_name
ORDER BY s.student_name;

Output:

student_name	math	science	english	average
Alice	95.50	88.00	92.00	91.83
Bob	85.00	90.50	88.50	88.00
Charlie	92.00	95.00	91.00	92.67

Pivot with Multiple Aggregations

-- Pivot with SUM, AVG, COUNT
SELECT 
    year,
    month,
    SUM(CASE WHEN region = 'North' THEN revenue ELSE 0 END) AS north_total,
    ROUND(AVG(CASE WHEN region = 'North' THEN revenue END), 2) AS north_avg,
    COUNT(CASE WHEN region = 'North' THEN 1 END) AS north_count,
    SUM(CASE WHEN region = 'South' THEN revenue ELSE 0 END) AS south_total,
    ROUND(AVG(CASE WHEN region = 'South' THEN revenue END), 2) AS south_avg,
    COUNT(CASE WHEN region = 'South' THEN 1 END) AS south_count
FROM monthly_sales
GROUP BY year, month
ORDER BY year, month;

Mathematical Properties

Pivot transformation:

\text{Pivot}: R(A, B, C) \rightarrow R'(A, C_1, C_2, ..., C_n) \quad \text{where } C_i = \sum_{b \in B_i} C

Unpivot transformation:

\text{Unpivot}: R'(A, C_1, C_2, ..., C_n) \rightarrow R(A, B, C) \quad \text{where } B = \{b_1, b_2, ..., b_n\}

Matrix representation:

M_{ij} = \text{value}(row_i, col_j)

ℹ️

Performance Tip: Pivot operations require full table scans. For large datasets, pre-aggregate data before pivoting.

Pivot with Window Functions

-- Pivot with ranking
WITH pivoted AS (
    SELECT 
        year,
        month,
        SUM(CASE WHEN region = 'North' THEN revenue ELSE 0 END) AS north,
        SUM(CASE WHEN region = 'South' THEN revenue ELSE 0 END) AS south
    FROM monthly_sales
    GROUP BY year, month
)
SELECT 
    year,
    month,
    north,
    south,
    RANK() OVER (ORDER BY north DESC) AS north_rank,
    RANK() OVER (ORDER BY south DESC) AS south_ratio,
    north::decimal / NULLIF(south, 0) AS north_south_ratio
FROM pivoted
ORDER BY year, month;

Cross-Tabulation Analysis

-- Contingency table for chi-square test
WITH contingency AS (
    SELECT 
        region,
        product,
        COUNT(*) AS frequency
    FROM monthly_sales
    GROUP BY region, product
)
SELECT 
    region,
    SUM(CASE WHEN product = 'Laptop' THEN frequency ELSE 0 END) AS laptop,
    SUM(CASE WHEN product = 'Phone' THEN frequency ELSE 0 END) AS phone,
    SUM(frequency) AS total
FROM contingency
GROUP BY region
ORDER BY region;

-- Expected frequencies for chi-square
WITH observed AS (
    SELECT 
        region,
        product,
        COUNT(*) AS observed
    FROM monthly_sales
    GROUP BY region, product
),
totals AS (
    SELECT 
        SUM(observed) AS grand_total,
        SUM(CASE WHEN product = 'Laptop' THEN observed ELSE 0 END) AS laptop_total,
        SUM(CASE WHEN product = 'Phone' THEN observed ELSE 0 END) AS phone_total,
        SUM(CASE WHEN region = 'North' THEN observed ELSE 0 END) AS north_total,
        SUM(CASE WHEN region = 'South' THEN observed ELSE 0 END) AS south_total
    FROM observed
)
SELECT 
    o.region,
    o.product,
    o.observed,
    ROUND(
        CASE 
            WHEN o.region = 'North' AND o.product = 'Laptop' 
                THEN t.north_total * t.laptop_total::decimal / t.grand_total
            WHEN o.region = 'North' AND o.product = 'Phone' 
                THEN t.north_total * t.phone_total::decimal / t.grand_total
            WHEN o.region = 'South' AND o.product = 'Laptop' 
                THEN t.south_total * t.laptop_total::decimal / t.grand_total
            WHEN o.region = 'South' AND o.product = 'Phone' 
                THEN t.south_total * t.phone_total::decimal / t.grand_total
        END, 2
    ) AS expected,
    ROUND(
        POWER(o.observed - 
            CASE 
                WHEN o.region = 'North' AND o.product = 'Laptop' 
                    THEN t.north_total * t.laptop_total::decimal / t.grand_total
                WHEN o.region = 'North' AND o.product = 'Phone' 
                    THEN t.north_total * t.phone_total::decimal / t.grand_total
                WHEN o.region = 'South' AND o.product = 'Laptop' 
                    THEN t.south_total * t.laptop_total::decimal / t.grand_total
                WHEN o.region = 'South' AND o.product = 'Phone' 
                    THEN t.south_total * t.phone_total::decimal / t.grand_total
            END, 2) / 
            CASE 
                WHEN o.region = 'North' AND o.product = 'Laptop' 
                    THEN t.north_total * t.laptop_total::decimal / t.grand_total
                WHEN o.region = 'North' AND o.product = 'Phone' 
                    THEN t.north_total * t.phone_total::decimal / t.grand_total
                WHEN o.region = 'South' AND o.product = 'Laptop' 
                    THEN t.south_total * t.laptop_total::decimal / t.grand_total
                WHEN o.region = 'South' AND o.product = 'Phone' 
                    THEN t.south_total * t.phone_total::decimal / t.grand_total
            END, 2
    ) AS chi_square_component
FROM observed o
CROSS JOIN totals t
ORDER BY o.region, o.product;

Performance Comparison

Method	Time Complexity	Space	Dynamic	Database
CASE WHEN	$O(n \cdot k)$	$O(n)$	No	All
crosstab	$O(n \log n)$	$O(n)$	No	PostgreSQL
Dynamic SQL	$O(n \log n)$	$O(n)$	Yes	All
LATERAL JOIN	$O(n \cdot k)$	$O(n)$	Yes	PostgreSQL

⚠️

Memory Warning: Pivoting large datasets can consume significant memory. Use work_mem setting and consider materializing intermediate results.