Advanced Window Functions Deep Dive
Difficulty: Hard | Companies: Google, Amazon, Meta, Netflix, Uber
Frame Clause Mastery
Window functions become powerful when you control exactly which rows participate in the calculation. The frame clause defines the window boundaries.
-- ROWS vs RANGE vs GROUPS frame types
SELECT
employee_id,
department,
salary,
hire_date,
-- ROWS frame: exactly N physical rows
AVG(salary) OVER (
PARTITION BY department
ORDER BY hire_date
ROWS BETWEEN 2 PRECEDING AND CURRENT ROW
) AS avg_last_3_rows,
-- RANGE frame: logical value range
AVG(salary) OVER (
PARTITION BY department
ORDER BY hire_date
RANGE BETWEEN INTERVAL '90' DAY PRECEDING AND CURRENT ROW
) AS avg_salary_90_days,
-- GROUPS frame: peer groups
SUM(salary) OVER (
PARTITION BY department
ORDER BY hire_date
GROUPS BETWEEN 1 PRECEDING AND 1 FOLLOWING
) AS sum_adjacent_groups
FROM employees;
βΉοΈ
Key Insight: ROWS counts physical rows, RANGE operates on values, and GROUPS counts peer groups (rows with identical ORDER BY values). The default frame when ORDER BY is present is RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW.
Nested Window Functions
Combining multiple window functions in a single query for complex analytics.
-- Employee ranking with multiple analytical dimensions
WITH ranked AS (
SELECT
employee_id,
department,
salary,
hire_date,
-- Rank within department by salary
DENSE_RANK() OVER (
PARTITION BY department ORDER BY salary DESC
) AS dept_rank,
-- Percentile across entire company
PERCENT_RANK() OVER (
ORDER BY salary
) AS company_percentile,
-- Salary difference from department average
salary - AVG(salary) OVER (
PARTITION BY department
) AS diff_from_dept_avg,
-- Cumulative distribution within department
CUME_DIST() OVER (
PARTITION BY department ORDER BY salary
) AS dept_cumulative_dist
FROM employees
)
SELECT
employee_id,
department,
salary,
dept_rank,
ROUND(company_percentile * 100, 1) AS percentile,
diff_from_dept_avg,
ROUND(dept_cumulative_dist * 100, 1) AS cum_dist_pct
FROM ranked
WHERE dept_rank <= 3;
Window Functions with FILTER clause
-- Conditional aggregation within window frames
SELECT
department,
employee_id,
salary,
-- Average salary only considering senior employees
AVG(salary) FILTER (WHERE years_experience > 5)
OVER (PARTITION BY department) AS avg_senior_salary,
-- Count of bonuses awarded in department
COUNT(bonus_amount) FILTER (WHERE bonus_amount > 0)
OVER (PARTITION BY department) AS bonuses_awarded,
-- Ratio of current salary to filtered average
salary / NULLIF(
AVG(salary) FILTER (WHERE employment_status = 'active')
OVER (PARTITION BY department),
0
) AS salary_to_active_avg_ratio
FROM employees;
LEAD/LAG with Complex Offsets
-- Compare with non-adjacent rows using variable offsets
SELECT
store_id,
sale_date,
revenue,
-- Revenue from 7 days ago
LAG(revenue, 7) OVER (
PARTITION BY store_id ORDER BY sale_date
) AS revenue_7_days_ago,
-- Revenue from same day last year
LAG(revenue, 1, 0) OVER (
PARTITION BY store_id, EXTRACT(DOY FROM sale_date)
ORDER BY sale_date
) AS revenue_yoy_same_day,
-- Ratio to previous sale
revenue / NULLIF(
LAG(revenue) OVER (
PARTITION BY store_id ORDER BY sale_date
),
0
) AS growth_ratio,
-- Moving difference with lead
LEAD(revenue, 1, revenue) OVER (
PARTITION BY store_id ORDER BY sale_date
) - revenue AS next_sale_diff
FROM daily_sales;
β οΈ
Common Mistake: When using LAG/LEAD with complex PARTITION BY clauses containing expressions, ensure the expression is deterministic. Non-deterministic expressions in PARTITION BY can cause inconsistent results.
NTILE and Distribution Functions
-- Sophisticated data distribution analysis
SELECT
customer_id,
total_purchases,
NTILE(10) OVER (ORDER BY total_purchases DESC) AS decile,
-- Assign to buckets with minimum row counts
NTILE(4) OVER (
ORDER BY total_purchases
ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING
) AS quartile,
-- Frequency distribution
WIDTH_BUCKET(total_purchases, 0, 10000, 20) AS purchase_bucket,
-- Percentile with interpolation
PERCENTILE_CONT(0.95) WITHIN GROUP (ORDER BY total_purchases)
OVER () AS p95_threshold
FROM customer_stats
WHERE order_count > 0;
First/Last Value with Custom Ordering
-- Get specific ordered values from window frames
SELECT
department,
employee_id,
salary,
hire_date,
-- Highest paid in department (explicit ordering)
FIRST_VALUE(employee_id) OVER (
PARTITION BY department
ORDER BY salary DESC
ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING
) AS top_earner_id,
-- Lowest paid after current row
LAST_VALUE(salary) OVER (
PARTITION BY department
ORDER BY salary ASC
ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING
) AS lowest_remaining_salary,
-- NTH value in department
NTH_VALUE(employee_id, 3) OVER (
PARTITION BY department
ORDER BY salary DESC
ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING
) AS third_highest_id
FROM employees;
Window Functions in UPDATE Statements
-- Update using window function calculations
UPDATE employee_salary_history
SET salary_band = sub.new_band
FROM (
SELECT
employee_id,
NTILE(5) OVER (ORDER BY salary) AS new_band
FROM employee_salary_history
WHERE effective_date = CURRENT_DATE
) sub
WHERE employee_salary_history.employee_id = sub.employee_id
AND employee_salary_history.effective_date = CURRENT_DATE;
-- PostgreSQL window function in INSERT
INSERT INTO department_rankings (department, rank_date, top_salary, avg_salary)
SELECT
department,
CURRENT_DATE,
FIRST_VALUE(salary) OVER w AS top_salary,
AVG(salary) OVER w AS avg_salary
FROM employees
WINDOW w AS (
PARTITION BY department
ORDER BY salary DESC
ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING
)
ON CONFLICT (department, rank_date)
DO UPDATE SET
top_salary = EXCLUDED.top_salary,
avg_salary = EXCLUDED.avg_salary;
Advanced: Window Function with Self-Join Optimization
-- Efficient percentile calculation without self-join
WITH percentiles AS (
SELECT
department,
employee_id,
salary,
PERCENT_RANK() OVER (
PARTITION BY department ORDER BY salary
) AS pct_rank,
SUM(salary) OVER (
PARTITION BY department
ORDER BY salary
ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW
) AS cumulative_salary
FROM employees
)
SELECT
department,
employee_id,
salary,
ROUND(pct_rank * 100, 2) AS percentile,
cumulative_salary,
cumulative_salary / SUM(salary) OVER (PARTITION BY department) AS pct_of_dept_total
FROM percentiles
WHERE pct_rank BETWEEN 0.9 AND 1.0;
Follow-Up Questions
- When would you choose
RANGEoverROWSframe, and what are the performance implications? - How does PostgreSQL handle
NULLS FIRST/LASTin window function ORDER BY clauses? - Can you use window functions in a
WHEREclause? If not, what's the workaround? - What's the difference between
DENSE_RANK(),RANK(), andROW_NUMBER()when there are ties? - How would you compute a 7-day moving average excluding weekends using window functions?
- Explain the
EXCLUDEclause in window frames (e.g.,EXCLUDE CURRENT ROW,EXCLUDE TIES).