Interview Question: "Explain the difference between INSERT ON CONFLICT and MERGE. How do you implement idempotent upserts? What are the race conditions in concurrent upserts?" — Asked at Amazon, Netflix, Spotify for Backend Engineer roles
ℹ️
Difficulty: Advanced | Companies: Amazon, Netflix, Spotify, Uber, Airbnb | Time: 45-60 minutes
MERGE Statement (SQL:2008 Standard)
MERGE provides atomic INSERT/UPDATE/DELETE operations:
-- Create target table
CREATE TABLE product_inventory (
product_id INT PRIMARY KEY,
product_name VARCHAR(100),
quantity INT,
last_updated TIMESTAMP,
version INT DEFAULT 1
);
-- Create source table (staging)
CREATE TABLE inventory_staging (
product_id INT,
product_name VARCHAR(100),
quantity INT
);
-- Insert initial data
INSERT INTO product_inventory VALUES
(1, 'Laptop', 50, CURRENT_TIMESTAMP, 1),
(2, 'Mouse', 200, CURRENT_TIMESTAMP, 1),
(3, 'Keyboard', 150, CURRENT_TIMESTAMP, 1);
-- Insert staging data
INSERT INTO inventory_staging VALUES
(1, 'Laptop', 45), -- Update (decrease)
(2, 'Mouse', 180), -- Update (decrease)
(4, 'Monitor', 30); -- Insert (new)
-- Standard MERGE operation
MERGE INTO product_inventory AS target
USING inventory_staging AS source
ON target.product_id = source.product_id
WHEN MATCHED AND source.quantity != target.quantity THEN
UPDATE SET
quantity = source.quantity,
last_updated = CURRENT_TIMESTAMP,
version = target.version + 1
WHEN MATCHED AND source.quantity = target.quantity THEN
DO NOTHING
WHEN NOT MATCHED THEN
INSERT (product_id, product_name, quantity, last_updated)
VALUES (source.product_id, source.product_name, source.quantity, CURRENT_TIMESTAMP);
-- Verify results
SELECT * FROM product_inventory ORDER BY product_id;
Output:
| product_id | product_name | quantity | last_updated | version |
|---|---|---|---|---|
| 1 | Laptop | 45 | 2024-06-15 10:30:00 | 2 |
| 2 | Mouse | 180 | 2024-06-15 10:30:00 | 2 |
| 3 | Keyboard | 150 | 2024-01-01 10:00:00 | 1 |
| 4 | Monitor | 30 | 2024-06-15 10:30:00 | 1 |
PostgreSQL UPSERT (ON CONFLICT)
-- Basic ON CONFLICT DO UPDATE
INSERT INTO product_inventory (product_id, product_name, quantity, last_updated)
VALUES (1, 'Laptop', 40, CURRENT_TIMESTAMP)
ON CONFLICT (product_id) DO UPDATE
SET
quantity = EXCLUDED.quantity,
last_updated = EXCLUDED.last_updated,
version = product_inventory.version + 1;
-- ON CONFLICT DO NOTHING
INSERT INTO product_inventory (product_id, product_name, quantity, last_updated)
VALUES (5, 'Webcam', 25, CURRENT_TIMESTAMP)
ON CONFLICT DO NOTHING;
-- Conditional upsert
INSERT INTO product_inventory (product_id, product_name, quantity, last_updated)
VALUES (1, 'Laptop', 40, CURRENT_TIMESTAMP)
ON CONFLICT (product_id) DO UPDATE
SET
quantity = CASE
WHEN EXCLUDED.quantity > product_inventory.quantity
THEN EXCLUDED.quantity
ELSE product_inventory.quantity
END,
last_updated = CURRENT_TIMESTAMP
WHERE EXCLUDED.quantity > product_inventory.quantity;
-- Upsert with subquery
INSERT INTO product_inventory (product_id, product_name, quantity, last_updated)
SELECT
product_id,
product_name,
quantity,
CURRENT_TIMESTAMP
FROM inventory_staging
ON CONFLICT (product_id) DO UPDATE
SET
quantity = EXCLUDED.quantity,
last_updated = EXCLUDED.last_updated;
MySQL INSERT ... ON DUPLICATE KEY UPDATE
-- MySQL syntax
INSERT INTO product_inventory (product_id, product_name, quantity, last_updated)
VALUES (1, 'Laptop', 40, CURRENT_TIMESTAMP)
ON DUPLICATE KEY UPDATE
quantity = VALUES(quantity),
last_updated = VALUES(last_updated),
version = version + 1;
-- With IF function
INSERT INTO product_inventory (product_id, product_name, quantity, last_updated)
VALUES (1, 'Laptop', 40, CURRENT_TIMESTAMP)
ON DUPLICATE KEY UPDATE
quantity = IF(VALUES(quantity) > quantity, VALUES(quantity), quantity),
last_updated = CURRENT_TIMESTAMP;
Idempotent Upsert Pattern
-- Create idempotency key table
CREATE TABLE idempotency_keys (
idempotency_key UUID PRIMARY KEY,
operation VARCHAR(100),
result JSONB,
created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
expires_at TIMESTAMP
);
-- Idempotent upsert function
CREATE OR REPLACE FUNCTION idempotent_upsert(
p_key UUID,
p_operation VARCHAR,
p_data JSONB
) RETURNS JSONB AS $$
DECLARE
v_result JSONB;
v_existing JSONB;
BEGIN
-- Check if key exists and not expired
SELECT result INTO v_existing
FROM idempotency_keys
WHERE idempotency_key = p_key
AND expires_at > CURRENT_TIMESTAMP;
IF FOUND THEN
-- Return cached result
RETURN v_existing;
END IF;
-- Execute operation
BEGIN
-- Your business logic here
v_result := jsonb_build_object(
'status', 'success',
'data', p_data,
'processed_at', NOW()
);
-- Store result
INSERT INTO idempotency_keys (idempotency_key, operation, result, expires_at)
VALUES (p_key, p_operation, v_result, CURRENT_TIMESTAMP + INTERVAL '24 hours');
RETURN v_result;
EXCEPTION
WHEN OTHERS THEN
v_result := jsonb_build_object(
'status', 'error',
'error', SQLERRM,
'processed_at', NOW()
);
RETURN v_result;
END;
END;
$$ LANGUAGE plpgsql;
Concurrent Upsert Race Conditions
-- Problem: Race condition without proper locking
-- Session 1:
BEGIN;
SELECT quantity FROM product_inventory WHERE product_id = 1;
-- Returns 45
-- Session 2:
BEGIN;
SELECT quantity FROM product_inventory WHERE product_id = 1;
-- Also returns 45
-- Session 1:
UPDATE product_inventory SET quantity = 45 - 10 WHERE product_id = 1;
COMMIT;
-- Session 2:
UPDATE product_inventory SET quantity = 45 - 5 WHERE product_id = 1;
COMMIT;
-- Result: quantity = 40, but should be 30!
-- Solution: Use SELECT FOR UPDATE
BEGIN;
SELECT quantity FROM product_inventory WHERE product_id = 1 FOR UPDATE;
-- Now Session 2 will block until Session 1 commits
UPDATE product_inventory SET quantity = quantity - 10 WHERE product_id = 1;
COMMIT;
Atomic Counter Pattern
-- Atomic increment/decrement
UPDATE product_inventory
SET quantity = quantity - 10
WHERE product_id = 1 AND quantity >= 10
RETURNING *;
-- With retry logic
DO $$
DECLARE
v_attempts INT := 0;
v_max_attempts INT := 3;
v_success BOOLEAN := FALSE;
BEGIN
WHILE v_attempts < v_max_attempts AND NOT v_success LOOP
BEGIN
UPDATE product_inventory
SET quantity = quantity - 10
WHERE product_id = 1 AND quantity >= 10;
IF FOUND THEN
v_success := TRUE;
ELSE
RAISE EXCEPTION 'Insufficient inventory';
END IF;
EXCEPTION
WHEN serialization_failure OR deadlock_detected THEN
v_attempts := v_attempts + 1;
PERFORM pg_sleep(random() * 0.1);
END;
END LOOP;
IF NOT v_success THEN
RAISE EXCEPTION 'Failed after % attempts', v_max_attempts;
END IF;
END $$;
Bulk UPSERT Pattern
-- Efficient bulk upsert
INSERT INTO product_inventory (product_id, product_name, quantity, last_updated)
SELECT
s.product_id,
s.product_name,
s.quantity,
CURRENT_TIMESTAMP
FROM inventory_staging s
ON CONFLICT (product_id) DO UPDATE
SET
product_name = EXCLUDED.product_name,
quantity = EXCLUDED.quantity,
last_updated = EXCLUDED.last_updated,
version = product_inventory.version + 1;
-- With batch processing
DO $$
DECLARE
v_batch_size INT := 1000;
v_rows_processed INT;
BEGIN
LOOP
INSERT INTO product_inventory (product_id, product_name, quantity, last_updated)
SELECT
s.product_id,
s.product_name,
s.quantity,
CURRENT_TIMESTAMP
FROM inventory_staging s
WHERE s.product_id NOT IN (
SELECT product_id FROM product_inventory
LIMIT v_batch_size
)
ON CONFLICT (product_id) DO UPDATE
SET
quantity = EXCLUDED.quantity,
last_updated = EXCLUDED.last_updated;
GET DIAGNOSTICS v_rows_processed = ROW_COUNT;
EXIT WHEN v_rows_processed = 0;
RAISE NOTICE 'Processed % rows', v_rows_processed;
COMMIT;
END LOOP;
END $$;
MERGE with Complex Logic
-- MERGE with multiple conditions
MERGE INTO product_inventory AS target
USING inventory_staging AS source
ON target.product_id = source.product_id
WHEN MATCHED AND source.quantity < target.quantity THEN
UPDATE SET
quantity = target.quantity - (target.quantity - source.quantity),
last_updated = CURRENT_TIMESTAMP,
version = target.version + 1
WHEN MATCHED AND source.quantity > target.quantity THEN
UPDATE SET
quantity = source.quantity,
last_updated = CURRENT_TIMESTAMP,
version = target.version + 1
WHEN MATCHED AND source.quantity = target.quantity THEN
DO NOTHING
WHEN NOT MATCHED AND source.quantity > 0 THEN
INSERT (product_id, product_name, quantity, last_updated)
VALUES (source.product_id, source.product_name, source.quantity, CURRENT_TIMESTAMP)
WHEN NOT MATCHED AND source.quantity <= 0 THEN
DO NOTHING;
Mathematical Properties
Idempotency property:
Upsert semantics:
Atomicity guarantee:
ℹ️
Performance Tip: For bulk upserts, batch your operations (1000-5000 rows) to balance lock duration and transaction overhead.
Conflict Resolution Strategies
| Strategy | Use Case | Implementation |
|---|---|---|
| Last Write Wins | Simple scenarios | ON CONFLICT DO UPDATE SET ... = EXCLUDED. |
| First Write Wins | Preserve original | ON CONFLICT DO NOTHING |
| Merge | Combine values | Custom logic in UPDATE |
| Reject | Strict validation | RAISE EXCEPTION |
| Version | Optimistic locking | WHERE version = EXCLUDED.version |
-- Version-based conflict detection
INSERT INTO product_inventory (product_id, product_name, quantity, last_updated, version)
VALUES (1, 'Laptop', 40, CURRENT_TIMESTAMP, 1)
ON CONFLICT (product_id) DO UPDATE
SET
quantity = EXCLUDED.quantity,
last_updated = EXCLUDED.last_updated,
version = product_inventory.version + 1
WHERE product_inventory.version = EXCLUDED.version;
-- If version doesn't match, no update occurs
-- This prevents lost updates in concurrent scenarios
⚠️
Common Pitfall: Using EXCLUDED.* without checking can lead to overwriting changes made by other transactions. Always consider versioning for critical data.