Interview Question: "Explain the difference between Range and List partitioning. How does partition pruning work? When would you use sub-partitioning?" — Asked at Oracle, Teradata, Snowflake for Data Architect roles
ℹ️
Difficulty: Advanced | Companies: Oracle, Teradata, Snowflake, Amazon Redshift, Google BigQuery | Time: 60-75 minutes
Partitioning Types
-- Range partitioning by date
CREATE TABLE sales (
sale_id SERIAL,
sale_date DATE,
region VARCHAR(20),
product_id INT,
amount DECIMAL(12,2)
) PARTITION BY RANGE (sale_date);
-- Create partitions
CREATE TABLE sales_2023 PARTITION OF sales
FOR VALUES FROM ('2023-01-01') TO ('2024-01-01');
CREATE TABLE sales_2024_q1 PARTITION OF sales
FOR VALUES FROM ('2024-01-01') TO ('2024-04-01');
CREATE TABLE sales_2024_q2 PARTITION OF sales
FOR VALUES FROM ('2024-04-01') TO ('2024-07-01');
CREATE TABLE sales_2024_q3 PARTITION OF sales
FOR VALUES FROM ('2024-07-01') TO ('2024-10-01');
CREATE TABLE sales_2024_q4 PARTITION OF sales
FOR VALUES FROM ('2024-10-01') TO ('2025-01-01');
-- List partitioning by region
CREATE TABLE sales_by_region (
sale_id SERIAL,
sale_date DATE,
region VARCHAR(20),
amount DECIMAL(12,2)
) PARTITION BY LIST (region);
CREATE TABLE sales_north PARTITION OF sales_by_region
FOR VALUES IN ('North', 'Northeast', 'Northwest');
CREATE TABLE sales_south PARTITION OF sales_by_region
FOR VALUES IN ('South', 'Southeast', 'Southwest');
CREATE TABLE sales_east PARTITION OF sales_by_region
FOR VALUES IN ('East');
CREATE TABLE sales_west PARTITION OF sales_by_region
FOR VALUES IN ('West');
Sub-Partitioning (Multi-Level)
-- Range-List sub-partitioning
CREATE TABLE orders (
order_id SERIAL,
order_date DATE,
region VARCHAR(20),
product_category VARCHAR(50),
amount DECIMAL(12,2)
) PARTITION BY RANGE (order_date);
-- First level: by year
CREATE TABLE orders_2024 PARTITION OF orders
FOR VALUES FROM ('2024-01-01') TO ('2025-01-01')
PARTITION BY LIST (region);
-- Second level: by region within year
CREATE TABLE orders_2024_north PARTITION OF orders_2024
FOR VALUES IN ('North', 'Northeast', 'Northwest');
CREATE TABLE orders_2024_south PARTITION OF orders_2024
FOR VALUES IN ('South', 'Southeast', 'Southwest');
-- Range-Hash sub-partitioning
CREATE TABLE events (
event_id SERIAL,
event_time TIMESTAMP,
user_id INT,
event_type VARCHAR(50)
) PARTITION BY RANGE (event_time);
CREATE TABLE events_2024 PARTITION OF events
FOR VALUES FROM ('2024-01-01') TO ('2025-01-01')
PARTITION BY HASH (user_id);
CREATE TABLE events_2024_p0 PARTITION OF events_2024
FOR VALUES WITH (MODULUS 4, REMAINDER 0);
CREATE TABLE events_2024_p1 PARTITION OF events_2024
FOR VALUES WITH (MODULUS 4, REMAINDER 1);
CREATE TABLE events_2024_p2 PARTITION OF events_2024
FOR VALUES WITH (MODULUS 4, REMAINDER 2);
CREATE TABLE events_2024_p3 PARTITION OF events_2024
FOR VALUES WITH (MODULUS 4, REMAINDER 3);
Partition Pruning
-- Enable partition pruning debugging
SET enable_partition_pruning = on;
-- Check if pruning is working
EXPLAIN (ANALYZE, BUFFERS)
SELECT * FROM sales
WHERE sale_date BETWEEN '2024-06-01' AND '2024-06-30';
-- Should only scan sales_2024_q2 partition
-- Multi-partition pruning
EXPLAIN (ANALYZE, BUFFERS)
SELECT * FROM sales
WHERE sale_date >= '2024-01-01' AND sale_date < '2024-07-01';
-- Should scan sales_2024_q1 and sales_2024_q2
-- List partition pruning
EXPLAIN (ANALYZE, BUFFERS)
SELECT * FROM sales_by_region
WHERE region = 'North';
Pruning Output:
Architecture Diagram
Append (cost=0.00..1234.56 rows=5000 width=48)
-> Seq Scan on sales_north sales (cost=0.00..1234.56 rows=5000 width=48)
Filter: (region = 'North')
Partition-Wise Operations
-- Partition-wise join
SET enable_partitionwise_join = on;
SET enable_partitionwise_aggregate = on;
-- Join on partition key
EXPLAIN (ANALYZE, BUFFERS)
SELECT s.*, r.region_name
FROM sales s
JOIN regions r ON s.region = r.region_name
WHERE s.sale_date BETWEEN '2024-01-01' AND '2024-12-31';
-- Partition-wise aggregation
EXPLAIN (ANALYZE, BUFFERS)
SELECT
date_trunc('month', sale_date) AS month,
region,
SUM(amount) AS total_sales
FROM sales
WHERE sale_date >= '2024-01-01'
GROUP BY date_trunc('month', sale_date), region;
Automated Partition Management
-- Create function to add new partitions
CREATE OR REPLACE FUNCTION create_monthly_partition()
RETURNS TRIGGER AS $$
DECLARE
partition_name TEXT;
start_date DATE;
end_date DATE;
BEGIN
start_date := DATE_TRUNC('month', NEW.sale_date);
end_date := start_date + INTERVAL '1 month';
partition_name := 'sales_' || TO_CHAR(start_date, 'YYYY_MM');
EXECUTE format(
'CREATE TABLE IF NOT EXISTS %I PARTITION OF sales FOR VALUES FROM (%L) TO (%L)',
partition_name, start_date, end_date
);
RETURN NEW;
END;
$$ LANGUAGE plpgsql;
-- Create trigger for automatic partition creation
CREATE TRIGGER auto_create_partition
BEFORE INSERT ON sales
FOR EACH ROW EXECUTE FUNCTION create_monthly_partition();
-- Partition maintenance script
DO $$
DECLARE
partition_date DATE;
partition_name TEXT;
BEGIN
FOR i IN 0..11 LOOP
partition_date := DATE_TRUNC('month', CURRENT_DATE) + (i || ' months')::INTERVAL;
partition_name := 'sales_' || TO_CHAR(partition_date, 'YYYY_MM');
IF NOT EXISTS (
SELECT 1 FROM pg_tables WHERE tablename = partition_name
) THEN
EXECUTE format(
'CREATE TABLE %I PARTITION OF sales FOR VALUES FROM (%L) TO (%L)',
partition_name,
partition_date,
partition_date + INTERVAL '1 month'
);
RAISE NOTICE 'Created partition: %', partition_name;
END IF;
END LOOP;
END $$;
Partition Statistics
-- View partition sizes
SELECT
schemaname,
tablename,
pg_size_pretty(pg_total_relation_size(schemaname||'.'||tablename)) AS size,
pg_total_relation_size(schemaname||'.'||tablename) AS size_bytes
FROM pg_tables
WHERE tablename LIKE 'sales%'
ORDER BY size_bytes DESC;
-- Partition row counts
SELECT
relname AS partition_name,
pg_class.reltuples::bigint AS row_estimate,
pg_size_pretty(pg_relation_size(oid)) AS size
FROM pg_class
WHERE relname LIKE 'sales%'
AND relkind = 'r'
ORDER BY relname;
-- Check partition pruning effectiveness
EXPLAIN (ANALYZE, BUFFERS, FORMAT JSON)
SELECT * FROM sales WHERE sale_date = '2024-06-15';
Advanced Partition Patterns
-- Rolling window partition management
DO $$
DECLARE
keep_months INT := 12;
drop_date DATE;
partition_name TEXT;
BEGIN
-- Drop old partitions
FOR i IN 1..keep_months LOOP
drop_date := DATE_TRUNC('month', CURRENT_DATE) - (i || ' months')::INTERVAL;
partition_name := 'sales_' || TO_CHAR(drop_date, 'YYYY_MM');
IF EXISTS (
SELECT 1 FROM pg_tables WHERE tablename = partition_name
) THEN
EXECUTE format('DROP TABLE IF EXISTS %I', partition_name);
RAISE NOTICE 'Dropped partition: %', partition_name;
END IF;
END LOOP;
END $$;
-- Detach old partitions for archiving
ALTER TABLE sales DETACH PARTITION sales_2023;
-- Attach new partition
CREATE TABLE sales_2025_q1 PARTITION OF sales
FOR VALUES FROM ('2025-01-01') TO ('2025-04-01');
Partition Key Selection
| Query Pattern | Recommended Key | Avoid |
|---|---|---|
| Time-series | Timestamp/date | Integer ID |
| Multi-tenant | Tenant ID | Random UUID |
| Regional | Region/country | Status |
| High cardinality | Hash of key | Low cardinality |
ℹ️
Partition Pruning: Always include partition key in WHERE clause. PostgreSQL can prune partitions at planning time, not execution.
Partition Performance Metrics
| Metric | Target | Warning |
|---|---|---|
| Partition Count | 10-100 | > 1000 |
| Partition Size | 100MB-10GB | < 1MB or > 100GB |
| Pruning Ratio | > 90% | < 50% |
| Cross-Partition Joins | 0 | > 10% |
-- Monitor partition usage
SELECT
schemaname,
relname,
seq_scan,
idx_scan,
n_tup_ins,
n_tup_upd,
n_tup_del
FROM pg_stat_user_tables
WHERE relname LIKE 'sales%'
ORDER BY relname;
Mathematical Properties
For partitions of size :
⚠️
Over-Partitioning: Too many partitions slow down planning and metadata operations. Aim for 100-1000 partitions maximum.