LATERAL JOIN & CROSS APPLY
Difficulty: Hard | Companies: Google, Amazon, Meta, Netflix, Uber
LATERAL JOIN Fundamentals
-- Get top 3 employees per department
SELECT
d.department_name,
e.*
FROM departments d
CROSS JOIN LATERAL (
SELECT
employee_id,
name,
salary,
hire_date
FROM employees
WHERE department_id = d.department_id
ORDER BY salary DESC
LIMIT 3
) e;
βΉοΈ
Key Insight: LATERAL allows the right side of a join to reference columns from the left side. It's like a correlated subquery in FROM clause. PostgreSQL uses CROSS JOIN LATERAL, SQL Server uses CROSS APPLY.
LATERAL vs Subquery Performance
-- Slow: Correlated subquery in SELECT
SELECT
department_id,
(SELECT MAX(salary) FROM employees e2
WHERE e2.department_id = e1.department_id) AS max_salary
FROM employees e1;
-- Fast: LATERAL JOIN approach
SELECT
d.department_id,
m.max_salary
FROM departments d
CROSS JOIN LATERAL (
SELECT MAX(salary) AS max_salary
FROM employees
WHERE department_id = d.department_id
) m;
LATERAL with Function Calls
-- Apply a function to each row
SELECT
u.user_id,
u.username,
g.geo_location,
p.product_name
FROM users u
CROSS JOIN LATERAL (
SELECT ST_SetSRID(ST_MakePoint(u.longitude, u.latitude), 4326) AS geo_location
) g
CROSS JOIN LATERAL (
SELECT name AS product_name
FROM products
ORDER BY ST_Distance(
g.geo_location,
ST_SetSRID(ST_MakePoint(products.longitude, products.latitude), 4326)
)
LIMIT 1
) p;
LEFT JOIN LATERAL for Optional Matches
-- Get most recent order per customer (including those with no orders)
SELECT
c.customer_id,
c.customer_name,
o.order_id,
o.order_date,
o.total_amount
FROM customers c
LEFT JOIN LATERAL (
SELECT order_id, order_date, total_amount
FROM orders
WHERE customer_id = c.customer_id
ORDER BY order_date DESC
LIMIT 1
) o ON true;
β οΈ
Important: When using LEFT JOIN LATERAL, always include ON true or ON false (for optional application). Without it, PostgreSQL may throw a syntax error.
CROSS APPLY (SQL Server Style)
-- SQL Server CROSS APPLY equivalent
SELECT
d.department_name,
e.top_earners
FROM departments d
CROSS APPLY (
SELECT TOP 3
employee_id,
name AS top_earners,
salary
FROM employees
WHERE department_id = d.department_id
ORDER BY salary DESC
) e;
LATERAL for JSON/Array Processing
-- Unnest JSON array per row
SELECT
order_id,
item.product_name,
item.quantity,
item.price
FROM orders
CROSS JOIN LATERAL (
SELECT
jsonb_array_elements(items) AS item_data
) raw
CROSS JOIN LATERAL (
SELECT
item_data->>'name' AS product_name,
(item_data->>'quantity')::INT AS quantity,
(item_data->>'price')::DECIMAL AS price
) item;
LATERAL for Running Calculations
-- Calculate running statistics per category
SELECT
p.product_id,
p.product_name,
d.sale_date,
d.quantity,
running_stats.avg_price,
running_stats.total_sold
FROM products p
CROSS JOIN LATERAL (
SELECT
AVG(price) AS avg_price,
SUM(quantity) AS total_sold
FROM sales
WHERE product_id = p.product_id
AND sale_date <= d.sale_date
) running_stats
INNER JOIN sales d ON p.product_id = d.product_id;
LATERAL with Window Functions
-- Get percentile ranks per group
SELECT
department_id,
employee_id,
salary,
dept_stats.median_salary,
dept_stats.std_dev
FROM employees
CROSS JOIN LATERAL (
SELECT
PERCENTILE_CONT(0.5) WITHIN GROUP (ORDER BY salary) AS median_salary,
STDDEV(salary) AS std_dev
FROM employees e2
WHERE e2.department_id = employees.department_id
) dept_stats;
Multiple LATERAL Joins
-- Chain multiple lateral joins for complex lookups
SELECT
o.order_id,
c.customer_name,
p.product_name,
s.shipping_address
FROM orders o
CROSS JOIN LATERAL (
SELECT customer_name
FROM customers
WHERE customer_id = o.customer_id
) c
CROSS JOIN LATERAL (
SELECT product_name
FROM products
WHERE product_id = o.product_id
) p
CROSS JOIN LATERAL (
SELECT shipping_address
FROM shipments
WHERE order_id = o.order_id
ORDER BY shipped_date DESC
LIMIT 1
) s;
LATERAL for Pivot-like Results
-- Dynamic column generation using LATERAL
SELECT
d.department_id,
d.department_name,
q1.q1_revenue,
q2.q2_revenue,
q3.q3_revenue,
q4.q4_revenue
FROM departments d
CROSS JOIN LATERAL (
SELECT SUM(revenue) AS q1_revenue
FROM sales
WHERE department_id = d.department_id
AND EXTRACT(QUARTER FROM sale_date) = 1
) q1
CROSS JOIN LATERAL (
SELECT SUM(revenue) AS q2_revenue
FROM sales
WHERE department_id = d.department_id
AND EXTRACT(QUARTER FROM sale_date) = 2
) q2
CROSS JOIN LATERAL (
SELECT SUM(revenue) AS q3_revenue
FROM sales
WHERE department_id = d.department_id
AND EXTRACT(QUARTER FROM sale_date) = 3
) q3
CROSS JOIN LATERAL (
SELECT SUM(revenue) AS q4_revenue
FROM sales
WHERE department_id = d.department_id
AND EXTRACT(QUARTER FROM sale_date) = 4
) q4;
LATERAL for Geospatial Queries
-- Find nearest N points of interest
SELECT
u.user_id,
u.user_name,
poi.poi_name,
poi.distance_meters
FROM users u
CROSS JOIN LATERAL (
SELECT
p.name AS poi_name,
ST_Distance(
ST_SetSRID(ST_MakePoint(u.longitude, u.latitude), 4326)::geography,
ST_SetSRID(ST_MakePoint(p.longitude, p.latitude), 4326)::geography
) AS distance_meters
FROM points_of_interest p
ORDER BY ST_Distance(
ST_SetSRID(ST_MakePoint(u.longitude, u.latitude), 4326)::geography,
ST_SetSRID(ST_MakePoint(p.longitude, p.latitude), 4326)::geography
)
LIMIT 5
) poi;
βΉοΈ
Optimization Tip: For geospatial LATERAL joins, create a GiST index on the geography column to avoid sequential scans. The index significantly improves performance for distance calculations.
Follow-Up Questions
- When would you use LATERAL JOIN instead of a correlated subquery?
- How does LATERAL JOIN interact with query optimizers differently than regular JOINs?
- What's the difference between
CROSS JOIN LATERALandLEFT JOIN LATERAL? - How would you optimize a LATERAL join that's processing millions of rows?
- Can you use LATERAL JOIN with window functions? What are the limitations?
- How do you handle NULL values in LATERAL JOIN results?