Advanced Partitioning: Range-List, Sub-Partition, Pruning

Interview Question: "Explain the difference between Range and List partitioning. How does partition pruning work? When would you use sub-partitioning?" — Asked at Oracle, Teradata, Snowflake for Data Architect roles

ℹ️

Difficulty: Advanced | Companies: Oracle, Teradata, Snowflake, Amazon Redshift, Google BigQuery | Time: 60-75 minutes

Partitioning Types

\text{Range} = [a, b) \quad \text{List} = \{v_1, v_2, ..., v_n\} \quad \text{Hash} = h(key) \mod n

-- Range partitioning by date
CREATE TABLE sales (
    sale_id SERIAL,
    sale_date DATE,
    region VARCHAR(20),
    product_id INT,
    amount DECIMAL(12,2)
) PARTITION BY RANGE (sale_date);

-- Create partitions
CREATE TABLE sales_2023 PARTITION OF sales
    FOR VALUES FROM ('2023-01-01') TO ('2024-01-01');

CREATE TABLE sales_2024_q1 PARTITION OF sales
    FOR VALUES FROM ('2024-01-01') TO ('2024-04-01');

CREATE TABLE sales_2024_q2 PARTITION OF sales
    FOR VALUES FROM ('2024-04-01') TO ('2024-07-01');

CREATE TABLE sales_2024_q3 PARTITION OF sales
    FOR VALUES FROM ('2024-07-01') TO ('2024-10-01');

CREATE TABLE sales_2024_q4 PARTITION OF sales
    FOR VALUES FROM ('2024-10-01') TO ('2025-01-01');

-- List partitioning by region
CREATE TABLE sales_by_region (
    sale_id SERIAL,
    sale_date DATE,
    region VARCHAR(20),
    amount DECIMAL(12,2)
) PARTITION BY LIST (region);

CREATE TABLE sales_north PARTITION OF sales_by_region
    FOR VALUES IN ('North', 'Northeast', 'Northwest');

CREATE TABLE sales_south PARTITION OF sales_by_region
    FOR VALUES IN ('South', 'Southeast', 'Southwest');

CREATE TABLE sales_east PARTITION OF sales_by_region
    FOR VALUES IN ('East');

CREATE TABLE sales_west PARTITION OF sales_by_region
    FOR VALUES IN ('West');

Sub-Partitioning (Multi-Level)

-- Range-List sub-partitioning
CREATE TABLE orders (
    order_id SERIAL,
    order_date DATE,
    region VARCHAR(20),
    product_category VARCHAR(50),
    amount DECIMAL(12,2)
) PARTITION BY RANGE (order_date);

-- First level: by year
CREATE TABLE orders_2024 PARTITION OF orders
    FOR VALUES FROM ('2024-01-01') TO ('2025-01-01')
    PARTITION BY LIST (region);

-- Second level: by region within year
CREATE TABLE orders_2024_north PARTITION OF orders_2024
    FOR VALUES IN ('North', 'Northeast', 'Northwest');

CREATE TABLE orders_2024_south PARTITION OF orders_2024
    FOR VALUES IN ('South', 'Southeast', 'Southwest');

-- Range-Hash sub-partitioning
CREATE TABLE events (
    event_id SERIAL,
    event_time TIMESTAMP,
    user_id INT,
    event_type VARCHAR(50)
) PARTITION BY RANGE (event_time);

CREATE TABLE events_2024 PARTITION OF events
    FOR VALUES FROM ('2024-01-01') TO ('2025-01-01')
    PARTITION BY HASH (user_id);

CREATE TABLE events_2024_p0 PARTITION OF events_2024
    FOR VALUES WITH (MODULUS 4, REMAINDER 0);
CREATE TABLE events_2024_p1 PARTITION OF events_2024
    FOR VALUES WITH (MODULUS 4, REMAINDER 1);
CREATE TABLE events_2024_p2 PARTITION OF events_2024
    FOR VALUES WITH (MODULUS 4, REMAINDER 2);
CREATE TABLE events_2024_p3 PARTITION OF events_2024
    FOR VALUES WITH (MODULUS 4, REMAINDER 3);

Partition Pruning

-- Enable partition pruning debugging
SET enable_partition_pruning = on;

-- Check if pruning is working
EXPLAIN (ANALYZE, BUFFERS)
SELECT * FROM sales 
WHERE sale_date BETWEEN '2024-06-01' AND '2024-06-30';

-- Should only scan sales_2024_q2 partition

-- Multi-partition pruning
EXPLAIN (ANALYZE, BUFFERS)
SELECT * FROM sales 
WHERE sale_date >= '2024-01-01' AND sale_date < '2024-07-01';

-- Should scan sales_2024_q1 and sales_2024_q2

-- List partition pruning
EXPLAIN (ANALYZE, BUFFERS)
SELECT * FROM sales_by_region 
WHERE region = 'North';

Pruning Output:

Architecture Diagram

Append  (cost=0.00..1234.56 rows=5000 width=48)
  ->  Seq Scan on sales_north sales  (cost=0.00..1234.56 rows=5000 width=48)
        Filter: (region = 'North')

Partition-Wise Operations

-- Partition-wise join
SET enable_partitionwise_join = on;
SET enable_partitionwise_aggregate = on;

-- Join on partition key
EXPLAIN (ANALYZE, BUFFERS)
SELECT s.*, r.region_name
FROM sales s
JOIN regions r ON s.region = r.region_name
WHERE s.sale_date BETWEEN '2024-01-01' AND '2024-12-31';

-- Partition-wise aggregation
EXPLAIN (ANALYZE, BUFFERS)
SELECT 
    date_trunc('month', sale_date) AS month,
    region,
    SUM(amount) AS total_sales
FROM sales
WHERE sale_date >= '2024-01-01'
GROUP BY date_trunc('month', sale_date), region;

Automated Partition Management

-- Create function to add new partitions
CREATE OR REPLACE FUNCTION create_monthly_partition()
RETURNS TRIGGER AS $$
DECLARE
    partition_name TEXT;
    start_date DATE;
    end_date DATE;
BEGIN
    start_date := DATE_TRUNC('month', NEW.sale_date);
    end_date := start_date + INTERVAL '1 month';
    partition_name := 'sales_' || TO_CHAR(start_date, 'YYYY_MM');
    
    EXECUTE format(
        'CREATE TABLE IF NOT EXISTS %I PARTITION OF sales FOR VALUES FROM (%L) TO (%L)',
        partition_name, start_date, end_date
    );
    
    RETURN NEW;
END;
$$ LANGUAGE plpgsql;

-- Create trigger for automatic partition creation
CREATE TRIGGER auto_create_partition
BEFORE INSERT ON sales
FOR EACH ROW EXECUTE FUNCTION create_monthly_partition();

-- Partition maintenance script
DO $$
DECLARE
    partition_date DATE;
    partition_name TEXT;
BEGIN
    FOR i IN 0..11 LOOP
        partition_date := DATE_TRUNC('month', CURRENT_DATE) + (i || ' months')::INTERVAL;
        partition_name := 'sales_' || TO_CHAR(partition_date, 'YYYY_MM');
        
        IF NOT EXISTS (
            SELECT 1 FROM pg_tables WHERE tablename = partition_name
        ) THEN
            EXECUTE format(
                'CREATE TABLE %I PARTITION OF sales FOR VALUES FROM (%L) TO (%L)',
                partition_name,
                partition_date,
                partition_date + INTERVAL '1 month'
            );
            RAISE NOTICE 'Created partition: %', partition_name;
        END IF;
    END LOOP;
END $$;

Partition Statistics

-- View partition sizes
SELECT 
    schemaname,
    tablename,
    pg_size_pretty(pg_total_relation_size(schemaname||'.'||tablename)) AS size,
    pg_total_relation_size(schemaname||'.'||tablename) AS size_bytes
FROM pg_tables
WHERE tablename LIKE 'sales%'
ORDER BY size_bytes DESC;

-- Partition row counts
SELECT 
    relname AS partition_name,
    pg_class.reltuples::bigint AS row_estimate,
    pg_size_pretty(pg_relation_size(oid)) AS size
FROM pg_class
WHERE relname LIKE 'sales%'
    AND relkind = 'r'
ORDER BY relname;

-- Check partition pruning effectiveness
EXPLAIN (ANALYZE, BUFFERS, FORMAT JSON)
SELECT * FROM sales WHERE sale_date = '2024-06-15';

Advanced Partition Patterns

-- Rolling window partition management
DO $$
DECLARE
    keep_months INT := 12;
    drop_date DATE;
    partition_name TEXT;
BEGIN
    -- Drop old partitions
    FOR i IN 1..keep_months LOOP
        drop_date := DATE_TRUNC('month', CURRENT_DATE) - (i || ' months')::INTERVAL;
        partition_name := 'sales_' || TO_CHAR(drop_date, 'YYYY_MM');
        
        IF EXISTS (
            SELECT 1 FROM pg_tables WHERE tablename = partition_name
        ) THEN
            EXECUTE format('DROP TABLE IF EXISTS %I', partition_name);
            RAISE NOTICE 'Dropped partition: %', partition_name;
        END IF;
    END LOOP;
END $$;

-- Detach old partitions for archiving
ALTER TABLE sales DETACH PARTITION sales_2023;

-- Attach new partition
CREATE TABLE sales_2025_q1 PARTITION OF sales
    FOR VALUES FROM ('2025-01-01') TO ('2025-04-01');

Partition Key Selection

\text{Best Key} = \arg\max_{k} \text{pruning\_efficiency}(k, \text{queries})

Query Pattern	Recommended Key	Avoid
Time-series	Timestamp/date	Integer ID
Multi-tenant	Tenant ID	Random UUID
Regional	Region/country	Status
High cardinality	Hash of key	Low cardinality

ℹ️

Partition Pruning: Always include partition key in WHERE clause. PostgreSQL can prune partitions at planning time, not execution.

Partition Performance Metrics

Metric	Target	Warning
Partition Count	10-100	> 1000
Partition Size	100MB-10GB	< 1MB or > 100GB
Pruning Ratio	> 90%	< 50%
Cross-Partition Joins	0	> 10%

-- Monitor partition usage
SELECT 
    schemaname,
    relname,
    seq_scan,
    idx_scan,
    n_tup_ins,
    n_tup_upd,
    n_tup_del
FROM pg_stat_user_tables
WHERE relname LIKE 'sales%'
ORDER BY relname;

Mathematical Properties

For $n$ partitions of size $s$ :

\text{Pruning Efficiency} = \frac{\text{Partitions Scanned}}{n} \times 100\%

\text{Ideal Partition Size} = \frac{\text{Table Size}}{n} \approx \text{OS Cache Size}

⚠️

Over-Partitioning: Too many partitions slow down planning and metadata operations. Aim for 100-1000 partitions maximum.