SQL Interview Problem Patterns
Difficulty: Hard | Companies: Google, Amazon, Meta, Netflix, Uber
Top N Per Group
-- Find top 3 earners per department
SELECT
department_id,
employee_id,
name,
salary,
DENSE_RANK() OVER (
PARTITION BY department_id ORDER BY salary DESC
) AS rank
FROM employees
QUALIFY rank <= 3; -- BigQuery syntax
-- PostgreSQL alternative
SELECT *
FROM (
SELECT
department_id,
employee_id,
name,
salary,
ROW_NUMBER() OVER (
PARTITION BY department_id ORDER BY salary DESC
) AS rn
FROM employees
) sub
WHERE rn <= 3;
βΉοΈ
Key Insight: Use ROW_NUMBER() for exactly N rows, DENSE_RANK() to include ties, and RANK() to skip ranks after ties. BigQuery's QUALIFY clause simplifies filtering window function results.
Running Total with Reset
-- Running total that resets at specific boundaries
SELECT
department_id,
employee_id,
salary,
hire_date,
SUM(salary) OVER (
PARTITION BY department_id
ORDER BY hire_date
ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW
) AS dept_running_total,
SUM(salary) OVER (
PARTITION BY department_id, DATE_TRUNC('month', hire_date)
ORDER BY hire_date
ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW
) AS monthly_running_total
FROM employees
ORDER BY department_id, hire_date;
Gap and Island
-- Find consecutive days of activity
WITH numbered AS (
SELECT
user_id,
activity_date,
activity_date - ROW_NUMBER() OVER (
PARTITION BY user_id ORDER BY activity_date
)::INT AS island_id
FROM user_activity
)
SELECT
user_id,
MIN(activity_date) AS streak_start,
MAX(activity_date) AS streak_end,
COUNT(*) AS streak_length
FROM numbered
GROUP BY user_id, island_id
HAVING COUNT(*) >= 3
ORDER BY user_id, streak_start;
Pivoting Data
-- Convert rows to columns
SELECT
department_id,
SUM(CASE WHEN EXTRACT(QUARTER FROM hire_date) = 1 THEN 1 ELSE 0 END) AS q1_hires,
SUM(CASE WHEN EXTRACT(QUARTER FROM hire_date) = 2 THEN 1 ELSE 0 END) AS q2_hires,
SUM(CASE WHEN EXTRACT(QUARTER FROM hire_date) = 3 THEN 1 ELSE 0 END) AS q3_hires,
SUM(CASE WHEN EXTRACT(QUARTER FROM hire_date) = 4 THEN 1 ELSE 0 END) AS q4_hires
FROM employees
WHERE EXTRACT(YEAR FROM hire_date) = 2024
GROUP BY department_id;
Sessionization
-- Group events into sessions (30-min gap = new session)
WITH events_with_gap AS (
SELECT
user_id,
event_time,
LAG(event_time) OVER (
PARTITION BY user_id ORDER BY event_time
) AS prev_event_time
FROM events
),
session_assignments AS (
SELECT
*,
SUM(
CASE
WHEN event_time - prev_event_time > INTERVAL '30 minutes'
OR prev_event_time IS NULL
THEN 1
ELSE 0
END
) OVER (
PARTITION BY user_id ORDER BY event_time
) AS session_id
FROM events_with_gap
)
SELECT
user_id,
session_id,
MIN(event_time) AS session_start,
MAX(event_time) AS session_end,
COUNT(*) AS event_count
FROM session_assignments
GROUP BY user_id, session_id;
Median Calculation
-- Calculate median
SELECT
department_id,
PERCENTILE_CONT(0.5) WITHIN GROUP (ORDER BY salary) AS median_salary
FROM employees
GROUP BY department_id;
-- Median with interpolation
SELECT
department_id,
PERCENTILE_DISC(0.5) WITHIN GROUP (ORDER BY salary) AS median_salary
FROM employees
GROUP BY department_id;
Year-over-Year Growth
-- Calculate YoY growth
WITH yearly_sales AS (
SELECT
EXTRACT(YEAR FROM order_date) AS year,
SUM(amount) AS total_sales
FROM orders
GROUP BY 1
)
SELECT
year,
total_sales,
LAG(total_sales) OVER (ORDER BY year) AS prev_year_sales,
ROUND(
(total_sales - LAG(total_sales) OVER (ORDER BY year)) * 100.0 /
NULLIF(LAG(total_sales) OVER (ORDER BY year), 0),
2
) AS yoy_growth_pct
FROM yearly_sales
ORDER BY year;
Consecutive Numbers
-- Find numbers appearing at least 3 times consecutively
SELECT DISTINCT num AS ConsecutiveNums
FROM (
SELECT
num,
LAG(num) OVER (ORDER BY id) AS prev1,
LAG(num, 2) OVER (ORDER BY id) AS prev2
FROM logs
) sub
WHERE num = prev1 AND num = prev2;
Department Statistics
-- Comprehensive department analysis
SELECT
d.department_name,
COUNT(e.employee_id) AS emp_count,
AVG(e.salary) AS avg_salary,
STDDEV(e.salary) AS salary_stddev,
MAX(e.salary) - MIN(e.salary) AS salary_range,
PERCENTILE_CONT(0.25) WITHIN GROUP (ORDER BY e.salary) AS q1,
PERCENTILE_CONT(0.75) WITHIN GROUP (ORDER BY e.salary) AS q3
FROM departments d
LEFT JOIN employees e ON d.department_id = e.department_id
GROUP BY d.department_name
HAVING COUNT(e.employee_id) > 5
ORDER BY avg_salary DESC;
Recursive Hierarchy
-- Traverse organizational hierarchy
WITH RECURSIVE org_chart AS (
SELECT
employee_id,
name,
manager_id,
1 AS level,
name::TEXT AS path
FROM employees
WHERE manager_id IS NULL
UNION ALL
SELECT
e.employee_id,
e.name,
e.manager_id,
oc.level + 1,
oc.path || ' β ' || e.name
FROM employees e
INNER JOIN org_chart oc ON e.manager_id = oc.employee_id
)
SELECT * FROM org_chart ORDER BY path;
Complex Aggregation
-- Multiple aggregations in single query
SELECT
department_id,
COUNT(*) AS total_employees,
COUNT(*) FILTER (WHERE salary > 80000) AS high_earners,
COUNT(*) FILTER (WHERE hire_date > CURRENT_DATE - INTERVAL '1 year') AS new_hires,
SUM(salary) AS total_salary,
AVG(salary) FILTER (WHERE status = 'active') AS avg_active_salary
FROM employees
GROUP BY department_id;
Window Function with FILTER
-- Conditional window calculations
SELECT
department_id,
employee_id,
salary,
AVG(salary) FILTER (WHERE years_experience > 5)
OVER (PARTITION BY department_id) AS avg_senior_salary,
COUNT(*) FILTER (WHERE performance_rating >= 4)
OVER (PARTITION BY department_id) AS high_performer_count
FROM employees;
Complex Joins
-- Self-join for comparison
SELECT
e1.employee_id,
e1.name,
e1.salary,
e2.name AS manager_name,
e2.salary AS manager_salary,
e1.salary - e2.salary AS salary_difference
FROM employees e1
LEFT JOIN employees e2 ON e1.manager_id = e2.employee_id;
-- Cross join for combinations
SELECT
p.product_name,
s.store_name,
COALESCE(i.quantity, 0) AS stock
FROM products p
CROSS JOIN stores s
LEFT JOIN inventory i ON p.product_id = i.product_id
AND s.store_id = i.store_id;
String Aggregation
-- Aggregate strings with ordering
SELECT
department_id,
STRING_AGG(name, ', ' ORDER BY salary DESC) AS employees_by_salary
FROM employees
GROUP BY department_id;
-- BigQuery equivalent
SELECT
department_id,
STRING_AGG(name, ', ' ORDER BY salary DESC) AS employees_by_salary
FROM employees
GROUP BY department_id;
Complex Date Calculations
-- Find first and last day of month
SELECT
employee_id,
hire_date,
DATE_TRUNC('month', hire_date)::DATE AS first_day_of_month,
(DATE_TRUNC('month', hire_date) + INTERVAL '1 month - 1 day')::DATE AS last_day_of_month
FROM employees;
-- Calculate business days
SELECT
order_id,
order_date,
ship_date,
(ship_date - order_date) AS calendar_days,
(ship_date - order_date) - (
EXTRACT(DOW FROM ship_date) - EXTRACT(DOW FROM order_date)
) / 7 * 2 AS business_days_approx
FROM orders;
Follow-Up Questions
- How would you optimize a query that's running slowly?
- What's the difference between WHERE and HAVING?
- Explain the concept of query plan and how to read it.
- How do you handle NULL values in SQL operations?
- What's the best approach for pagination in SQL?
- How would you implement a ranking system with ties?