Running Totals & Cumulative
Difficulty: Hard | Companies: Google, Amazon, Meta, Netflix, Uber
Basic Running Total
-- Simple running total
SELECT
sale_date,
revenue,
SUM(revenue) OVER (
ORDER BY sale_date
ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW
) AS running_total
FROM daily_sales
ORDER BY sale_date;
βΉοΈ
Key Insight: Use ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW for a true running total. Without the frame clause, PostgreSQL defaults to RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW which includes peers.
Running Total by Partition
-- Running total per department
SELECT
department_id,
employee_id,
salary,
SUM(salary) OVER (
PARTITION BY department_id
ORDER BY hire_date
ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW
) AS dept_running_total,
SUM(salary) OVER (
ORDER BY hire_date
ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW
) AS company_running_total
FROM employees
ORDER BY department_id, hire_date;
Cumulative Percentage
-- Calculate cumulative percentage of total
SELECT
department_id,
employee_id,
salary,
SUM(salary) OVER w AS running_total,
SUM(salary) OVER () AS grand_total,
ROUND(
SUM(salary) OVER w * 100.0 / SUM(salary) OVER (),
2
) AS cumulative_pct
FROM employees
WINDOW w AS (
ORDER BY salary DESC
ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW
)
ORDER BY salary DESC;
Running Average
-- Moving averages with different windows
SELECT
sale_date,
revenue,
-- 7-day moving average
AVG(revenue) OVER (
ORDER BY sale_date
ROWS BETWEEN 6 PRECEDING AND CURRENT ROW
) AS ma_7day,
-- 30-day moving average
AVG(revenue) OVER (
ORDER BY sale_date
ROWS BETWEEN 29 PRECEDING AND CURRENT ROW
) AS ma_30day,
-- Centered moving average
AVG(revenue) OVER (
ORDER BY sale_date
ROWS BETWEEN 3 PRECEDING AND 3 FOLLOWING
) AS ma_centered_7day
FROM daily_sales
ORDER BY sale_date;
Cumulative Sum with Reset
-- Running total that resets at specific boundaries
SELECT
department_id,
employee_id,
salary,
hire_date,
SUM(salary) OVER (
PARTITION BY department_id
ORDER BY hire_date
ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW
) AS dept_running_total,
-- Reset at month boundaries
SUM(salary) OVER (
PARTITION BY department_id, DATE_TRUNC('month', hire_date)
ORDER BY hire_date
ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW
) AS monthly_running_total
FROM employees
ORDER BY department_id, hire_date;
β οΈ
Common Mistake: Using RANGE instead of ROWS in the frame clause can produce unexpected results when there are ties in the ORDER BY column. ROWS is generally safer for running totals.
BigQuery Running Totals
-- BigQuery running total syntax
SELECT
date,
revenue,
SUM(revenue) OVER (
ORDER BY date
ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW
) AS running_total,
AVG(revenue) OVER (
ORDER BY date
ROWS BETWEEN 6 PRECEDING AND CURRENT ROW
) AS ma_7day
FROM `project.dataset.daily_sales`
ORDER BY date;
Cumulative Distribution Function
-- CDF and inverse CDF calculations
SELECT
employee_id,
salary,
CUME_DIST() OVER (ORDER BY salary) AS cumulative_distribution,
PERCENT_RANK() OVER (ORDER BY salary) AS percentile_rank,
NTILE(100) OVER (ORDER BY salary) AS percentile_bucket,
SUM(COUNT(*)) OVER (
ORDER BY salary
ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW
) * 100.0 / SUM(COUNT(*)) OVER () AS manual_cdf
FROM employees
GROUP BY employee_id, salary
ORDER BY salary;
Running Count
-- Running count with various conditions
SELECT
sale_date,
revenue,
COUNT(*) OVER (
ORDER BY sale_date
ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW
) AS running_count,
COUNT(*) FILTER (WHERE revenue > 1000) OVER (
ORDER BY sale_date
ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW
) AS running_high_count,
ROW_NUMBER() OVER (
ORDER BY sale_date
) AS sequential_number
FROM daily_sales
ORDER BY sale_date;
Running Maximum and Minimum
-- Running min/max with window functions
SELECT
sale_date,
revenue,
MAX(revenue) OVER (
ORDER BY sale_date
ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW
) AS running_max,
MIN(revenue) OVER (
ORDER BY sale_date
ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW
) AS running_min,
revenue - MIN(revenue) OVER (
ORDER BY sale_date
ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW
) AS above_running_min,
MAX(revenue) OVER (
ORDER BY sale_date
ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW
) - revenue AS below_running_max
FROM daily_sales
ORDER BY sale_date;
Running Total with Gaps
-- Handle gaps in data with running total
WITH date_series AS (
SELECT generate_series(
MIN(sale_date),
MAX(sale_date),
'1 day'::INTERVAL
)::DATE AS date
FROM daily_sales
)
SELECT
ds.date,
COALESCE(s.revenue, 0) AS revenue,
SUM(COALESCE(s.revenue, 0)) OVER (
ORDER BY ds.date
ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW
) AS running_total
FROM date_series ds
LEFT JOIN daily_sales s ON ds.date = s.sale_date
ORDER BY ds.date;
Percentile Running Total
-- Running percentile calculation
WITH ranked AS (
SELECT
department_id,
employee_id,
salary,
ROW_NUMBER() OVER (
PARTITION BY department_id ORDER BY salary
) AS rn,
COUNT(*) OVER (PARTITION BY department_id) AS total_count
FROM employees
)
SELECT
department_id,
employee_id,
salary,
rn,
total_count,
ROUND(rn * 100.0 / total_count, 2) AS running_percentile
FROM ranked
ORDER BY department_id, salary;
Cumulative Moving Average
-- CMA: average of all values up to current point
SELECT
sale_date,
revenue,
AVG(revenue) OVER (
ORDER BY sale_date
ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW
) AS cumulative_moving_avg,
SUM(AVG(revenue)) OVER (
ORDER BY sale_date
ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW
) AS cumulative_avg_sum
FROM daily_sales
GROUP BY sale_date, revenue
ORDER BY sale_date;
Follow-Up Questions
- What's the difference between
ROWSandRANGEin running total calculations? - How would you calculate a running total that handles NULL values?
- Explain how to compute a running geometric mean.
- How do you reset running totals at specific intervals?
- What's the best approach for running totals in partitioned tables?
- How would you implement a running total with a custom aggregation function?