πŸŽ‰ 75% of content is free forever β€” Unlock Premium from $10/mo β†’
CW
Search courses…
πŸ’Ό Servicesℹ️ Aboutβœ‰οΈ ContactView Pricing Plansfrom $10

Running Totals & Cumulative

Advanced SQLWindow Functions⭐ Premium

Advertisement

Running Totals & Cumulative

Difficulty: Hard | Companies: Google, Amazon, Meta, Netflix, Uber

Basic Running Total

-- Simple running total
SELECT
  sale_date,
  revenue,
  SUM(revenue) OVER (
    ORDER BY sale_date
    ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW
  ) AS running_total
FROM daily_sales
ORDER BY sale_date;

ℹ️

Key Insight: Use ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW for a true running total. Without the frame clause, PostgreSQL defaults to RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW which includes peers.

Running Total by Partition

-- Running total per department
SELECT
  department_id,
  employee_id,
  salary,
  SUM(salary) OVER (
    PARTITION BY department_id
    ORDER BY hire_date
    ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW
  ) AS dept_running_total,
  SUM(salary) OVER (
    ORDER BY hire_date
    ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW
  ) AS company_running_total
FROM employees
ORDER BY department_id, hire_date;

Cumulative Percentage

-- Calculate cumulative percentage of total
SELECT
  department_id,
  employee_id,
  salary,
  SUM(salary) OVER w AS running_total,
  SUM(salary) OVER () AS grand_total,
  ROUND(
    SUM(salary) OVER w * 100.0 / SUM(salary) OVER (),
    2
  ) AS cumulative_pct
FROM employees
WINDOW w AS (
  ORDER BY salary DESC
  ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW
)
ORDER BY salary DESC;

Running Average

-- Moving averages with different windows
SELECT
  sale_date,
  revenue,
  -- 7-day moving average
  AVG(revenue) OVER (
    ORDER BY sale_date
    ROWS BETWEEN 6 PRECEDING AND CURRENT ROW
  ) AS ma_7day,
  -- 30-day moving average
  AVG(revenue) OVER (
    ORDER BY sale_date
    ROWS BETWEEN 29 PRECEDING AND CURRENT ROW
  ) AS ma_30day,
  -- Centered moving average
  AVG(revenue) OVER (
    ORDER BY sale_date
    ROWS BETWEEN 3 PRECEDING AND 3 FOLLOWING
  ) AS ma_centered_7day
FROM daily_sales
ORDER BY sale_date;

Cumulative Sum with Reset

-- Running total that resets at specific boundaries
SELECT
  department_id,
  employee_id,
  salary,
  hire_date,
  SUM(salary) OVER (
    PARTITION BY department_id
    ORDER BY hire_date
    ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW
  ) AS dept_running_total,
  -- Reset at month boundaries
  SUM(salary) OVER (
    PARTITION BY department_id, DATE_TRUNC('month', hire_date)
    ORDER BY hire_date
    ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW
  ) AS monthly_running_total
FROM employees
ORDER BY department_id, hire_date;

⚠️

Common Mistake: Using RANGE instead of ROWS in the frame clause can produce unexpected results when there are ties in the ORDER BY column. ROWS is generally safer for running totals.

BigQuery Running Totals

-- BigQuery running total syntax
SELECT
  date,
  revenue,
  SUM(revenue) OVER (
    ORDER BY date
    ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW
  ) AS running_total,
  AVG(revenue) OVER (
    ORDER BY date
    ROWS BETWEEN 6 PRECEDING AND CURRENT ROW
  ) AS ma_7day
FROM `project.dataset.daily_sales`
ORDER BY date;

Cumulative Distribution Function

-- CDF and inverse CDF calculations
SELECT
  employee_id,
  salary,
  CUME_DIST() OVER (ORDER BY salary) AS cumulative_distribution,
  PERCENT_RANK() OVER (ORDER BY salary) AS percentile_rank,
  NTILE(100) OVER (ORDER BY salary) AS percentile_bucket,
  SUM(COUNT(*)) OVER (
    ORDER BY salary
    ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW
  ) * 100.0 / SUM(COUNT(*)) OVER () AS manual_cdf
FROM employees
GROUP BY employee_id, salary
ORDER BY salary;

Running Count

-- Running count with various conditions
SELECT
  sale_date,
  revenue,
  COUNT(*) OVER (
    ORDER BY sale_date
    ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW
  ) AS running_count,
  COUNT(*) FILTER (WHERE revenue > 1000) OVER (
    ORDER BY sale_date
    ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW
  ) AS running_high_count,
  ROW_NUMBER() OVER (
    ORDER BY sale_date
  ) AS sequential_number
FROM daily_sales
ORDER BY sale_date;

Running Maximum and Minimum

-- Running min/max with window functions
SELECT
  sale_date,
  revenue,
  MAX(revenue) OVER (
    ORDER BY sale_date
    ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW
  ) AS running_max,
  MIN(revenue) OVER (
    ORDER BY sale_date
    ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW
  ) AS running_min,
  revenue - MIN(revenue) OVER (
    ORDER BY sale_date
    ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW
  ) AS above_running_min,
  MAX(revenue) OVER (
    ORDER BY sale_date
    ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW
  ) - revenue AS below_running_max
FROM daily_sales
ORDER BY sale_date;

Running Total with Gaps

-- Handle gaps in data with running total
WITH date_series AS (
  SELECT generate_series(
    MIN(sale_date),
    MAX(sale_date),
    '1 day'::INTERVAL
  )::DATE AS date
  FROM daily_sales
)
SELECT
  ds.date,
  COALESCE(s.revenue, 0) AS revenue,
  SUM(COALESCE(s.revenue, 0)) OVER (
    ORDER BY ds.date
    ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW
  ) AS running_total
FROM date_series ds
LEFT JOIN daily_sales s ON ds.date = s.sale_date
ORDER BY ds.date;

Percentile Running Total

-- Running percentile calculation
WITH ranked AS (
  SELECT
    department_id,
    employee_id,
    salary,
    ROW_NUMBER() OVER (
      PARTITION BY department_id ORDER BY salary
    ) AS rn,
    COUNT(*) OVER (PARTITION BY department_id) AS total_count
  FROM employees
)
SELECT
  department_id,
  employee_id,
  salary,
  rn,
  total_count,
  ROUND(rn * 100.0 / total_count, 2) AS running_percentile
FROM ranked
ORDER BY department_id, salary;

Cumulative Moving Average

-- CMA: average of all values up to current point
SELECT
  sale_date,
  revenue,
  AVG(revenue) OVER (
    ORDER BY sale_date
    ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW
  ) AS cumulative_moving_avg,
  SUM(AVG(revenue)) OVER (
    ORDER BY sale_date
    ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW
  ) AS cumulative_avg_sum
FROM daily_sales
GROUP BY sale_date, revenue
ORDER BY sale_date;

Follow-Up Questions

  1. What's the difference between ROWS and RANGE in running total calculations?
  2. How would you calculate a running total that handles NULL values?
  3. Explain how to compute a running geometric mean.
  4. How do you reset running totals at specific intervals?
  5. What's the best approach for running totals in partitioned tables?
  6. How would you implement a running total with a custom aggregation function?

Advertisement