πŸŽ‰ 75% of content is free forever β€” Unlock Premium from $10/mo β†’
CW
Search courses…
πŸ’Ό Servicesℹ️ Aboutβœ‰οΈ ContactView Pricing Plansfrom $10

Materialized Views & Caching

Advanced SQLPerformance⭐ Premium

Advertisement

Materialized Views & Caching

Difficulty: Hard | Companies: Google, Amazon, Meta, Netflix, Uber

Materialized View Fundamentals

-- Create materialized view
CREATE MATERIALIZED VIEW mv_monthly_sales AS
SELECT
  DATE_TRUNC('month', order_date)::DATE AS month,
  region,
  product_category,
  SUM(amount) AS total_sales,
  COUNT(*) AS order_count,
  COUNT(DISTINCT customer_id) AS unique_customers
FROM orders
INNER JOIN products ON orders.product_id = products.product_id
GROUP BY 1, 2, 3;

-- Query the materialized view (fast!)
SELECT * FROM mv_monthly_sales
WHERE month = '2024-01-01'
  AND region = 'US';

ℹ️

Key Insight: Materialized views store the result of a query physically. They're excellent for expensive aggregations that are queried frequently. Unlike regular views, they don't update automatically.

Refresh Strategies

-- Full refresh (blocking)
REFRESH MATERIALIZED VIEW mv_monthly_sales;

-- Concurrent refresh (non-blocking, requires unique index)
REFRESH MATERIALIZED VIEW CONCURRENTLY mv_monthly_sales;

-- Refresh with data until refresh completes
CREATE MATERIALIZED VIEW mv_sales WITH DATA AS
SELECT ...;

-- Create without data, populate later
CREATE MATERIALIZED VIEW mv_sales WITHOUT DATA AS
SELECT ...;
REFRESH MATERIALIZED VIEW mv_sales;

Concurrent Refresh with Unique Index

-- Create unique index for concurrent refresh
CREATE UNIQUE INDEX idx_mv_monthly_sales
ON mv_monthly_sales (month, region, product_category);

-- Now concurrent refresh is possible
REFRESH MATERIALIZED VIEW CONCURRENTLY mv_monthly_sales;

⚠️

Important: Concurrent refresh requires a UNIQUE index. Without it, you must use non-concurrent refresh which locks the view.

BigQuery Materialized Views

-- BigQuery materialized view
CREATE MATERIALIZED VIEW `project.dataset.mv_daily_sales`
OPTIONS (
  enable_refresh = true,
  refresh_interval_minutes = 60
)
AS
SELECT
  DATE(order_date) AS sale_date,
  product_category,
  SUM(amount) AS total_sales,
  COUNT(*) AS order_count
FROM `project.dataset.orders`
GROUP BY 1, 2;

-- Check refresh status
SELECT * FROM `project.dataset.INFORMATION_SCHEMA.MATERIALIZED_VIEWS`;

Incremental Refresh Pattern

-- Track changes for incremental refresh
CREATE TABLE mv_refresh_log (
  mv_name VARCHAR(100),
  last_refresh TIMESTAMP,
  PRIMARY KEY (mv_name)
);

-- Incremental refresh procedure
CREATE OR REPLACE PROCEDURE refresh_mv_incremental(mv_name TEXT)
LANGUAGE plpgsql
AS $$
DECLARE
  last_refresh_time TIMESTAMP;
BEGIN
  -- Get last refresh time
  SELECT last_refresh INTO last_refresh_time
  FROM mv_refresh_log
  WHERE mv_name = $1;

  -- Refresh materialized view
  REFRESH MATERIALIZED VIEW CONCURRENTLY mv_monthly_sales;

  -- Update refresh log
  INSERT INTO mv_refresh_log (mv_name, last_refresh)
  VALUES ($1, CURRENT_TIMESTAMP)
  ON CONFLICT (mv_name)
  DO UPDATE SET last_refresh = CURRENT_TIMESTAMP;
END;
$$;

Indexing Materialized Views

-- Create indexes on materialized views
CREATE INDEX idx_mv_sales_month ON mv_monthly_sales (month);
CREATE INDEX idx_mv_sales_region ON mv_monthly_sales (region, month);
CREATE INDEX idx_mv_sales_category ON mv_monthly_sales (product_category);

-- Check index sizes
SELECT
  indexname,
  pg_size_pretty(pg_relation_size(indexname::regclass)) AS size
FROM pg_indexes
WHERE tablename = 'mv_monthly_sales';

Query Caching Patterns

-- Cache expensive computations
CREATE MATERIALIZED VIEW mv_user_stats AS
SELECT
  user_id,
  COUNT(*) AS total_orders,
  SUM(amount) AS total_spent,
  AVG(amount) AS avg_order_value,
  MIN(order_date) AS first_order,
  MAX(order_date) AS last_order,
  AVG(EXTRACT(DAY FROM order_date - LAG(order_date)
      OVER (ORDER BY order_date))) AS avg_days_between_orders
FROM orders
GROUP BY user_id;

-- Refresh schedule (e.g., daily)
REFRESH MATERIALIZED VIEW CONCURRENTLY mv_user_stats;

Partitioned Materialized Views

-- Partition materialized view by date
CREATE MATERIALIZED VIEW mv_orders_daily
PARTITION BY RANGE (sale_date) AS
SELECT
  DATE(order_date) AS sale_date,
  product_id,
  SUM(amount) AS daily_sales
FROM orders
GROUP BY 1, 2;

-- Create partitions
CREATE TABLE mv_orders_daily_2024_q1
  PARTITION OF mv_orders_daily
  FOR VALUES FROM ('2024-01-01') TO ('2024-04-01');

Conditional Materialization

-- Create materialized view with filter
CREATE MATERIALIZED VIEW mv_high_value_orders AS
SELECT
  order_id,
  customer_id,
  total_amount,
  order_date
FROM orders
WHERE total_amount > 10000  -- Only high-value orders
  AND order_date > CURRENT_DATE - INTERVAL '1 year';

-- Index for fast lookups
CREATE INDEX idx_mv_hvo_customer ON mv_high_value_orders (customer_id);

Materialized View Refresh Monitoring

-- Monitor refresh status
SELECT
  schemaname,
  matviewname,
  pg_size_pretty(pg_total_relation_size(schemaname||'.'||matviewname)) AS size,
  last_refresh
FROM pg_stat_user_matviews
ORDER BY pg_total_relation_size(schemaname||'.'||matviewname) DESC;

-- Check refresh frequency
SELECT
  mv_name,
  last_refresh,
  CURRENT_TIMESTAMP - last_refresh AS time_since_refresh
FROM mv_refresh_log;

Auto-Refresh with pg_cron

-- Schedule automatic refresh using pg_cron
CREATE EXTENSION IF NOT EXISTS pg_cron;

-- Refresh daily at 2 AM
SELECT cron.schedule(
  'refresh-mv-monthly',
  '0 2 * * *',
  'REFRESH MATERIALIZED VIEW CONCURRENTLY mv_monthly_sales'
);

-- Check scheduled jobs
SELECT * FROM cron.job;

Data Warehouse Optimization

-- Star schema materialized views
CREATE MATERIALIZED VIEW mv_fact_sales_summary AS
SELECT
  d.date_key,
  p.product_key,
  c.customer_key,
  s.store_key,
  SUM(f.sales_amount) AS total_sales,
  SUM(f.quantity) AS total_quantity,
  COUNT(*) AS transaction_count
FROM fact_sales f
INNER JOIN dim_date d ON f.date_id = d.date_id
INNER JOIN dim_product p ON f.product_id = p.product_id
INNER JOIN dim_customer c ON f.customer_id = c.customer_id
INNER JOIN dim_store s ON f.store_id = s.store_id
GROUP BY 1, 2, 3, 4;

-- Create composite index
CREATE INDEX idx_mv_fss_composite
ON mv_fact_sales_summary (date_key, product_key, customer_key, store_key);

Nested Materialized Views

-- Create base materialized view
CREATE MATERIALIZED VIEW mv_base_sales AS
SELECT
  order_date,
  product_id,
  SUM(amount) AS daily_sales
FROM orders
GROUP BY 1, 2;

-- Create derived materialized view
CREATE MATERIALIZED VIEW mv_weekly_sales AS
SELECT
  DATE_TRUNC('week', order_date)::DATE AS week,
  product_id,
  SUM(daily_sales) AS weekly_sales
FROM mv_base_sales
GROUP BY 1, 2;

-- Refresh base first, then derived
REFRESH MATERIALIZED VIEW mv_base_sales;
REFRESH MATERIALIZED VIEW mv_weekly_sales;

Follow-Up Questions

  1. When should you use materialized views vs regular views?
  2. How do you handle stale data in materialized views?
  3. What's the performance impact of concurrent vs non-concurrent refresh?
  4. How would you implement incremental refresh for large materialized views?
  5. Explain the trade-offs between refresh frequency and query performance.
  6. How do you manage materialized views in a data warehouse environment?

Advertisement