🎉 75% of content is free forever — Unlock Premium from $10/mo →
CW
Search courses…
💼 Servicesℹ️ About✉️ ContactView Pricing Plansfrom $10

JSON in SQL: JSON_TABLE, Path Queries, Indexing JSONB

Advanced SQLJSON Operations⭐ Premium

Advertisement

Interview Question: "What's the difference between JSON and JSONB in PostgreSQL? How do you index JSON fields? Explain JSON_TABLE and its use cases." — Asked at MongoDB, Couchbase, Elastic for Senior Data Engineer roles

ℹ️

Difficulty: Advanced | Companies: MongoDB, Couchbase, Elastic, Stripe, Shopify | Time: 45-60 minutes

JSON Data Types

PostgreSQL offers two JSON types:

JSON=Text storage with validation\text{JSON} = \text{Text storage with validation}
JSONB=Binary storage with indexing\text{JSONB} = \text{Binary storage with indexing}
-- Create table with JSON data
CREATE TABLE user_events (
    event_id SERIAL PRIMARY KEY,
    user_id INT,
    event_type VARCHAR(50),
    event_data JSONB,
    created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP
);

-- Insert JSON data
INSERT INTO user_events (user_id, event_type, event_data) VALUES
(1, 'purchase', '{
    "product_id": 123,
    "product_name": "Laptop",
    "price": 999.99,
    "quantity": 1,
    "payment": {
        "method": "credit_card",
        "last_four": "4242",
        "currency": "USD"
    },
    "shipping": {
        "address": {
            "street": "123 Main St",
            "city": "San Francisco",
            "state": "CA",
            "zip": "94102"
        },
        "method": "express",
        "cost": 15.00
    },
    "tags": ["electronics", "computers", "sale"]
}'),
(2, 'page_view', '{
    "page": "/products",
    "referrer": "google.com",
    "duration_seconds": 45,
    "device": {
        "type": "mobile",
        "os": "iOS",
        "browser": "Safari"
    },
    "location": {
        "country": "US",
        "city": "New York"
    }
}'),
(3, 'signup', '{
    "method": "email",
    "plan": "premium",
    "features": ["analytics", "api_access", "priority_support"],
    "referral": {
        "source": "friend",
        "code": "FRIEND2024"
    }
}');

JSON Path Expressions

PostgreSQL uses JSONB path expressions:

Path=$accessoraccessor...\text{Path} = \$ \cdot \text{accessor} \cdot \text{accessor} \cdot ...
-- Basic JSON access operators
SELECT 
    event_data -> 'product_id' AS product_id_json,
    event_data ->> 'product_id' AS product_id_text,
    event_data -> 'payment' ->> 'method' AS payment_method,
    event_data #>> '{shipping,address,city}' AS city
FROM user_events
WHERE event_type = 'purchase';

-- JSON path expressions (PostgreSQL 12+)
SELECT 
    event_data @? '$.payment.method ? (@ == "credit_card")' AS is_credit_card,
    event_data @@ '$.price > 500' AS is_expensive,
    event_data ->> '$.tags[0]' AS first_tag,
    event_data ->> '$.tags[*]' AS all_tags
FROM user_events
WHERE event_type = 'purchase';

Output:

product_id_jsonproduct_id_textpayment_methodcity
123123credit_cardSan Francisco

JSON_TABLE Equivalent

PostgreSQL doesn't have JSON_TABLE (Oracle/MySQL feature), but we can emulate it:

-- Emulate JSON_TABLE with LATERAL JOIN
SELECT 
    u.event_id,
    u.user_id,
    p.product_id,
    p.product_name,
    p.price,
    p.quantity
FROM user_events u
CROSS JOIN LATERAL jsonb_to_recordset(u.event_data->'items') AS p(
    product_id INT,
    product_name TEXT,
    price DECIMAL,
    quantity INT
)
WHERE u.event_type = 'purchase';

-- Unnest JSON arrays
SELECT 
    event_id,
    user_id,
    jsonb_array_elements_text(event_data->'tags') AS tag
FROM user_events
WHERE event_type = 'purchase';

-- With ordinality for array position
SELECT 
    event_id,
    user_id,
    t.tag,
    t.ordinality AS position
FROM user_events,
LATERAL jsonb_array_elements_text(event_data->'tags') WITH ORDINALITY AS t(tag, ordinality)
WHERE event_type = 'purchase';

Output:

event_iduser_idtagposition
11electronics1
11computers2
11sale3

JSON Aggregation

-- Aggregate rows into JSON array
SELECT 
    event_type,
    jsonb_agg(event_data) AS events
FROM user_events
GROUP BY event_type;

-- Aggregate into JSON object
SELECT 
    user_id,
    jsonb_object_agg(event_type, event_data) AS user_events
FROM user_events
GROUP BY user_id;

-- Build JSON from scratch
SELECT 
    jsonb_build_object(
        'user_id', user_id,
        'event_count', COUNT(*),
        'event_types', jsonb_agg(DISTINCT event_type)
    ) AS user_summary
FROM user_events
GROUP BY user_id;

Output:

{
  "user_id": 1,
  "event_count": 1,
  "event_types": ["purchase"]
}

JSONB Indexing

-- GIN index for containment queries
CREATE INDEX idx_events_gin ON user_events USING gin(event_data);

-- Specific path index
CREATE INDEX idx_events_product ON user_events USING btree((event_data->>'product_id'));

-- Expression index for common queries
CREATE INDEX idx_events_price ON user_events USING btree(
    ((event_data->>'price')::decimal)
) WHERE event_type = 'purchase';

-- Check index usage
EXPLAIN (ANALYZE, BUFFERS)
SELECT * FROM user_events 
WHERE event_data @> '{"event_type": "purchase"}';

EXPLAIN (ANALYZE, BUFFERS)
SELECT * FROM user_events 
WHERE event_data->>'product_id' = '123';

-- Compare query plans
EXPLAIN (ANALYZE, BUFFERS)
SELECT * FROM user_events 
WHERE event_data->'payment'->>'method' = 'credit_card';

JSON Transformation Functions

-- Convert JSON to record
SELECT *
FROM jsonb_to_record(event_data) AS x(
    product_id INT,
    product_name TEXT,
    price DECIMAL
)
WHERE event_type = 'purchase';

-- Convert JSON array to rows
SELECT *
FROM jsonb_to_recordset('[{"id": 1, "name": "A"}, {"id": 2, "name": "B"}]') AS x(
    id INT,
    name TEXT
);

-- Flatten nested JSON
SELECT 
    event_id,
    event_data->>'user_id' AS user_id,
    event_data->'shipping'->'address'->>'street' AS street,
    event_data->'shipping'->'address'->>'city' AS city
FROM user_events
WHERE event_type = 'purchase';

-- Pretty print JSON
SELECT jsonb_pretty(event_data)
FROM user_events
WHERE event_id = 1;

JSON Query Patterns

-- Filter by nested value
SELECT * FROM user_events
WHERE event_data @> '{
    "payment": {"method": "credit_card"}
}';

-- Array contains
SELECT * FROM user_events
WHERE event_data->'tags' @> '["electronics"]';

-- Multiple conditions
SELECT * FROM user_events
WHERE event_data->>'event_type' = 'purchase'
    AND (event_data->>'price')::decimal > 500
    AND event_data->'shipping'->>'method' = 'express';

-- Regex on JSON values
SELECT * FROM user_events
WHERE event_data->>'product_name' ~* 'laptop';

-- Exists check
SELECT * FROM user_events
WHERE event_data ? 'payment';

-- Any key exists
SELECT * FROM user_events
WHERE event_data ?| ARRAY['payment', 'shipping'];

-- All keys exist
SELECT * FROM user_events
WHERE event_data ?& ARRAY['payment', 'shipping'];

JSON Modification

-- Add field
UPDATE user_events
SET event_data = event_data || '{"processed": true}'::jsonb
WHERE event_id = 1;

-- Remove field
UPDATE user_events
SET event_data = event_data - 'tags'
WHERE event_id = 1;

-- Set nested field
UPDATE user_events
SET event_data = jsonb_set(
    event_data,
    '{payment,currency}',
    '"EUR"'
)
WHERE event_id = 1;

-- Delete nested field
UPDATE user_events
SET event_data = event_data #- '{shipping,address}'
WHERE event_id = 1;

-- Merge JSON objects
UPDATE user_events
SET event_data = event_data || jsonb_build_object(
    'metadata', jsonb_build_object(
        'updated_at', NOW(),
        'version', (event_data->>'version')::int + 1
    )
)
WHERE event_id = 1;

JSON Statistics and Analysis

-- Count distinct values in JSON
SELECT 
    event_type,
    jsonb_object_keys(event_data) AS key,
    COUNT(*) AS occurrences
FROM user_events
GROUP BY event_type, key
ORDER BY event_type, occurrences DESC;

-- Extract and aggregate
SELECT 
    event_type,
    AVG((event_data->>'price')::decimal) AS avg_price,
    SUM((event_data->>'price')::decimal) AS total_revenue
FROM user_events
WHERE event_data ? 'price'
GROUP BY event_type;

-- JSON histogram
SELECT 
    width_bucket(
        (event_data->>'price')::decimal,
        0, 2000, 10
    ) AS price_bucket,
    COUNT(*) AS frequency
FROM user_events
WHERE event_data ? 'price'
GROUP BY price_bucket
ORDER BY price_bucket;

JSON Performance Optimization

-- Check JSONB storage size
SELECT 
    event_id,
    pg_column_size(event_data) AS bytes,
    pg_size_pretty(pg_column_size(event_data)) AS size,
    jsonb_object_keys(event_data)::text[] AS keys
FROM user_events;

-- JSONB compression
SELECT 
    event_id,
    pg_column_size(event_data) AS original_size,
    pg_column_size(to_jsonb(event_data)) AS compressed_size
FROM user_events;

-- Avoid JSON in hot paths
-- Instead of:
-- SELECT event_data->>'field' FROM events WHERE event_data->>'field' = 'value';
-- Use:
-- SELECT field FROM events WHERE field = 'value';

Mathematical Formulas

JSON path evaluation cost:

Cpath=O(d)where d=depth of pathC_{path} = O(d) \quad \text{where } d = \text{depth of path}

GIN index lookup cost:

Cgin=O(logn+k)where k=matching documentsC_{gin} = O(\log n + k) \quad \text{where } k = \text{matching documents}

JSON containment check:

AB    kA:kBA[k]=B[k]A \subseteq B \iff \forall k \in A: k \in B \wedge A[k] = B[k]

ℹ️

Pro Tip: Use JSONB for query performance, JSON for storage. Convert between them with ::jsonb and ::json.

JSON Schema Validation

-- Create validation function
CREATE OR REPLACE FUNCTION validate_event_data()
RETURNS TRIGGER AS $$
BEGIN
    -- Check required fields
    IF NOT (NEW.event_data ? 'event_type') THEN
        RAISE EXCEPTION 'Missing event_type field';
    END IF;
    
    -- Validate specific types
    IF NEW.event_type = 'purchase' THEN
        IF NOT (NEW.event_data ? 'product_id' AND 
                jsonb_typeof(NEW.event_data->'product_id') = 'number') THEN
            RAISE EXCEPTION 'product_id must be a number';
        END IF;
        
        IF NOT (NEW.event_data ? 'price' AND 
                (NEW.event_data->>'price')::decimal > 0) THEN
            RAISE EXCEPTION 'price must be positive';
        END IF;
    END IF;
    
    RETURN NEW;
END;
$$ LANGUAGE plpgsql;

CREATE TRIGGER validate_event
BEFORE INSERT OR UPDATE ON user_events
FOR EACH ROW EXECUTE FUNCTION validate_event_data();

⚠️

JSONB Pitfalls: JSONB doesn't preserve duplicate keys. Use JSON type if you need to preserve duplicates.

Advertisement