Advanced Window Functions Deep Dive

Difficulty: Hard | Companies: Google, Amazon, Meta, Netflix, Uber

Frame Clause Mastery

Window functions become powerful when you control exactly which rows participate in the calculation. The frame clause defines the window boundaries.

-- ROWS vs RANGE vs GROUPS frame types
SELECT
  employee_id,
  department,
  salary,
  hire_date,

  -- ROWS frame: exactly N physical rows
  AVG(salary) OVER (
    PARTITION BY department
    ORDER BY hire_date
    ROWS BETWEEN 2 PRECEDING AND CURRENT ROW
  ) AS avg_last_3_rows,

  -- RANGE frame: logical value range
  AVG(salary) OVER (
    PARTITION BY department
    ORDER BY hire_date
    RANGE BETWEEN INTERVAL '90' DAY PRECEDING AND CURRENT ROW
  ) AS avg_salary_90_days,

  -- GROUPS frame: peer groups
  SUM(salary) OVER (
    PARTITION BY department
    ORDER BY hire_date
    GROUPS BETWEEN 1 PRECEDING AND 1 FOLLOWING
  ) AS sum_adjacent_groups
FROM employees;

ℹ️

Key Insight: ROWS counts physical rows, RANGE operates on values, and GROUPS counts peer groups (rows with identical ORDER BY values). The default frame when ORDER BY is present is RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW.

Nested Window Functions

Combining multiple window functions in a single query for complex analytics.

-- Employee ranking with multiple analytical dimensions
WITH ranked AS (
  SELECT
    employee_id,
    department,
    salary,
    hire_date,

    -- Rank within department by salary
    DENSE_RANK() OVER (
      PARTITION BY department ORDER BY salary DESC
    ) AS dept_rank,

    -- Percentile across entire company
    PERCENT_RANK() OVER (
      ORDER BY salary
    ) AS company_percentile,

    -- Salary difference from department average
    salary - AVG(salary) OVER (
      PARTITION BY department
    ) AS diff_from_dept_avg,

    -- Cumulative distribution within department
    CUME_DIST() OVER (
      PARTITION BY department ORDER BY salary
    ) AS dept_cumulative_dist
  FROM employees
)
SELECT
  employee_id,
  department,
  salary,
  dept_rank,
  ROUND(company_percentile * 100, 1) AS percentile,
  diff_from_dept_avg,
  ROUND(dept_cumulative_dist * 100, 1) AS cum_dist_pct
FROM ranked
WHERE dept_rank <= 3;

Window Functions with FILTER clause

-- Conditional aggregation within window frames
SELECT
  department,
  employee_id,
  salary,

  -- Average salary only considering senior employees
  AVG(salary) FILTER (WHERE years_experience > 5)
    OVER (PARTITION BY department) AS avg_senior_salary,

  -- Count of bonuses awarded in department
  COUNT(bonus_amount) FILTER (WHERE bonus_amount > 0)
    OVER (PARTITION BY department) AS bonuses_awarded,

  -- Ratio of current salary to filtered average
  salary / NULLIF(
    AVG(salary) FILTER (WHERE employment_status = 'active')
      OVER (PARTITION BY department),
    0
  ) AS salary_to_active_avg_ratio
FROM employees;

LEAD/LAG with Complex Offsets

-- Compare with non-adjacent rows using variable offsets
SELECT
  store_id,
  sale_date,
  revenue,

  -- Revenue from 7 days ago
  LAG(revenue, 7) OVER (
    PARTITION BY store_id ORDER BY sale_date
  ) AS revenue_7_days_ago,

  -- Revenue from same day last year
  LAG(revenue, 1, 0) OVER (
    PARTITION BY store_id, EXTRACT(DOY FROM sale_date)
    ORDER BY sale_date
  ) AS revenue_yoy_same_day,

  -- Ratio to previous sale
  revenue / NULLIF(
    LAG(revenue) OVER (
      PARTITION BY store_id ORDER BY sale_date
    ),
    0
  ) AS growth_ratio,

  -- Moving difference with lead
  LEAD(revenue, 1, revenue) OVER (
    PARTITION BY store_id ORDER BY sale_date
  ) - revenue AS next_sale_diff
FROM daily_sales;

⚠️

Common Mistake: When using LAG/LEAD with complex PARTITION BY clauses containing expressions, ensure the expression is deterministic. Non-deterministic expressions in PARTITION BY can cause inconsistent results.

NTILE and Distribution Functions

-- Sophisticated data distribution analysis
SELECT
  customer_id,
  total_purchases,
  NTILE(10) OVER (ORDER BY total_purchases DESC) AS decile,

  -- Assign to buckets with minimum row counts
  NTILE(4) OVER (
    ORDER BY total_purchases
    ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING
  ) AS quartile,

  -- Frequency distribution
  WIDTH_BUCKET(total_purchases, 0, 10000, 20) AS purchase_bucket,

  -- Percentile with interpolation
  PERCENTILE_CONT(0.95) WITHIN GROUP (ORDER BY total_purchases)
    OVER () AS p95_threshold
FROM customer_stats
WHERE order_count > 0;

First/Last Value with Custom Ordering

-- Get specific ordered values from window frames
SELECT
  department,
  employee_id,
  salary,
  hire_date,

  -- Highest paid in department (explicit ordering)
  FIRST_VALUE(employee_id) OVER (
    PARTITION BY department
    ORDER BY salary DESC
    ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING
  ) AS top_earner_id,

  -- Lowest paid after current row
  LAST_VALUE(salary) OVER (
    PARTITION BY department
    ORDER BY salary ASC
    ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING
  ) AS lowest_remaining_salary,

  -- NTH value in department
  NTH_VALUE(employee_id, 3) OVER (
    PARTITION BY department
    ORDER BY salary DESC
    ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING
  ) AS third_highest_id
FROM employees;

Window Functions in UPDATE Statements

-- Update using window function calculations
UPDATE employee_salary_history
SET salary_band = sub.new_band
FROM (
  SELECT
    employee_id,
    NTILE(5) OVER (ORDER BY salary) AS new_band
  FROM employee_salary_history
  WHERE effective_date = CURRENT_DATE
) sub
WHERE employee_salary_history.employee_id = sub.employee_id
  AND employee_salary_history.effective_date = CURRENT_DATE;

-- PostgreSQL window function in INSERT
INSERT INTO department_rankings (department, rank_date, top_salary, avg_salary)
SELECT
  department,
  CURRENT_DATE,
  FIRST_VALUE(salary) OVER w AS top_salary,
  AVG(salary) OVER w AS avg_salary
FROM employees
WINDOW w AS (
  PARTITION BY department
  ORDER BY salary DESC
  ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING
)
ON CONFLICT (department, rank_date)
DO UPDATE SET
  top_salary = EXCLUDED.top_salary,
  avg_salary = EXCLUDED.avg_salary;

Advanced: Window Function with Self-Join Optimization

-- Efficient percentile calculation without self-join
WITH percentiles AS (
  SELECT
    department,
    employee_id,
    salary,
    PERCENT_RANK() OVER (
      PARTITION BY department ORDER BY salary
    ) AS pct_rank,
    SUM(salary) OVER (
      PARTITION BY department
      ORDER BY salary
      ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW
    ) AS cumulative_salary
  FROM employees
)
SELECT
  department,
  employee_id,
  salary,
  ROUND(pct_rank * 100, 2) AS percentile,
  cumulative_salary,
  cumulative_salary / SUM(salary) OVER (PARTITION BY department) AS pct_of_dept_total
FROM percentiles
WHERE pct_rank BETWEEN 0.9 AND 1.0;

Follow-Up Questions

When would you choose RANGE over ROWS frame, and what are the performance implications?
How does PostgreSQL handle NULLS FIRST/LAST in window function ORDER BY clauses?
Can you use window functions in a WHERE clause? If not, what's the workaround?
What's the difference between DENSE_RANK(), RANK(), and ROW_NUMBER() when there are ties?
How would you compute a 7-day moving average excluding weekends using window functions?
Explain the EXCLUDE clause in window frames (e.g., EXCLUDE CURRENT ROW, EXCLUDE TIES).