Partition Pruning & Elimination
Difficulty: Hard | Companies: Google, Amazon, Meta, Netflix, Uber
Range Partitioning
-- PostgreSQL declarative partitioning
CREATE TABLE orders (
order_id BIGINT,
customer_id INT,
order_date DATE,
total_amount DECIMAL(10,2)
) PARTITION BY RANGE (order_date);
-- Create partitions
CREATE TABLE orders_2023_q1 PARTITION OF orders
FOR VALUES FROM ('2023-01-01') TO ('2023-04-01');
CREATE TABLE orders_2023_q2 PARTITION OF orders
FOR VALUES FROM ('2023-04-01') TO ('2023-07-01');
CREATE TABLE orders_2023_q3 PARTITION OF orders
FOR VALUES FROM ('2023-07-01') TO ('2023-10-01');
CREATE TABLE orders_2023_q4 PARTITION OF orders
FOR VALUES FROM ('2023-10-01') TO ('2024-01-01');
βΉοΈ
Key Insight: Partition pruning automatically eliminates partitions that don't match the query's WHERE clause. This dramatically reduces I/O by only scanning relevant partitions.
Partition Pruning Verification
-- Verify partition pruning is working
EXPLAIN (ANALYZE, BUFFERS)
SELECT * FROM orders
WHERE order_date >= '2023-04-01'
AND order_date < '2023-07-01';
-- Check pruning statistics
SELECT
schemaname,
tablename,
pg_size_pretty(pg_total_relation_size(schemaname||'.'||tablename)) AS size
FROM pg_tables
WHERE tablename LIKE 'orders_%'
ORDER BY tablename;
Hash Partitioning
-- Hash partitioning for even distribution
CREATE TABLE users (
user_id BIGINT,
username VARCHAR(100),
email VARCHAR(255)
) PARTITION BY HASH (user_id);
-- Create hash partitions
CREATE TABLE users_p0 PARTITION OF users
FOR VALUES WITH (MODULUS 4, REMAINDER 0);
CREATE TABLE users_p1 PARTITION OF users
FOR VALUES WITH (MODULUS 4, REMAINDER 1);
CREATE TABLE users_p2 PARTITION OF users
FOR VALUES WITH (MODULUS 4, REMAINDER 2);
CREATE TABLE users_p3 PARTITION OF users
FOR VALUES WITH (MODULUS 4, REMAINDER 3);
List Partitioning
-- List partitioning for categorical data
CREATE TABLE sales (
sale_id BIGINT,
region VARCHAR(50),
amount DECIMAL(10,2),
sale_date DATE
) PARTITION BY LIST (region);
CREATE TABLE sales_us PARTITION OF sales
FOR VALUES IN ('US_EAST', 'US_WEST', 'US_CENTRAL');
CREATE TABLE sales_eu PARTITION OF sales
FOR VALUES IN ('EU_WEST', 'EU_EAST', 'EU_NORTH');
CREATE TABLE sales_apac PARTITION OF sales
FOR VALUES IN ('APAC_EAST', 'APAC_WEST');
BigQuery Partitioning
-- BigQuery time-unit partitioning
CREATE TABLE `project.dataset.events` (
event_id STRING,
event_time TIMESTAMP,
user_id STRING,
event_type STRING
)
PARTITION BY DATE(event_time)
OPTIONS (
require_partition_filter = true
);
-- BigQuery ingestion-time partitioning
CREATE TABLE `project.dataset.logs` (
log_id STRING,
message STRING
)
PARTITION BY DATE(_PARTITIONTIME);
-- Query with partition filter
SELECT *
FROM `project.dataset.events`
WHERE event_time >= '2024-01-01'
AND event_time < '2024-02-01';
β οΈ
Important: BigQuery's require_partition_filter = true option forces queries to include a partition filter, preventing full-table scans and reducing costs.
Partition Pruning Patterns
-- Good: Pruning works
SELECT * FROM orders
WHERE order_date = '2023-06-15';
-- Good: Range pruning
SELECT * FROM orders
WHERE order_date BETWEEN '2023-04-01' AND '2023-06-30';
-- Bad: Pruning fails (function on partition column)
SELECT * FROM orders
WHERE EXTRACT(YEAR FROM order_date) = 2023;
-- Good: Rewrite for pruning
SELECT * FROM orders
WHERE order_date >= '2023-01-01'
AND order_date < '2024-01-01';
Composite Partitioning
-- Subpartitioning (partition of partition)
CREATE TABLE orders (
order_id BIGINT,
customer_id INT,
order_date DATE,
region VARCHAR(50)
) PARTITION BY RANGE (order_date);
-- First level: by date
CREATE TABLE orders_2023 PARTITION OF orders
FOR VALUES FROM ('2023-01-01') TO ('2024-01-01')
PARTITION BY LIST (region);
-- Second level: by region within year
CREATE TABLE orders_2023_us PARTITION OF orders_2023
FOR VALUES IN ('US_EAST', 'US_WEST');
Partition Maintenance
-- Create new partition in advance
CREATE TABLE orders_2024_q1 PARTITION OF orders
FOR VALUES FROM ('2024-01-01') TO ('2024-04-01');
-- Detach old partition
ALTER TABLE orders DETACH PARTITION orders_2022_q1;
-- Drop old partition
DROP TABLE orders_2022_q1;
-- Attach existing table as partition
CREATE TABLE orders_old (
LIKE orders INCLUDING ALL
);
ALTER TABLE orders ATTACH PARTITION orders_old
FOR VALUES FROM ('2022-01-01') TO ('2023-01-01');
Default Partition
-- Default partition catches all unmatched rows
CREATE TABLE orders_default PARTITION OF orders DEFAULT;
-- This ensures no rows are lost
INSERT INTO orders (order_id, order_date)
VALUES (1, '2025-01-01'); -- Goes to default partition
Partition-wise JOIN
-- Enable partition-wise join
SET enable_partitionwise_join = on;
-- Query joins partitioned tables
SELECT
o.order_id,
c.customer_name
FROM orders o
INNER JOIN customers c ON o.customer_id = c.customer_id
WHERE o.order_date >= '2023-01-01'
AND o.order_date < '2024-01-01';
Partition Statistics
-- Analyze partition statistics
SELECT
schemaname,
tablename,
n_live_tup,
n_dead_tup,
last_vacuum,
last_autovacuum
FROM pg_stat_user_tables
WHERE tablename LIKE 'orders_%'
ORDER BY tablename;
-- Check partition sizes
SELECT
partition,
pg_size_pretty(pg_total_relation_size partition) AS size
FROM (
SELECT
schemaname||'.'||tablename AS partition
FROM pg_tables
WHERE tablename LIKE 'orders_%'
) sub;
Follow-Up Questions
- When would you choose hash partitioning over range partitioning?
- How does partition pruning interact with index usage?
- What's the impact of partitioning on INSERT performance?
- How do you handle cross-partition queries efficiently?
- Explain the concept of partition elimination in distributed databases.
- How do you manage partition lifecycle in a data warehouse?