Interview Question: "Compare ARRAY vs JSONB for storing lists. When would you use Range types? Explain the performance implications of using complex types." β Asked at MongoDB, Couchbase, Databricks for Data Modeling roles
βΉοΈ
Difficulty: Advanced | Companies: MongoDB, Couchbase, Databricks, Snowflake, Google BigQuery | Time: 45-60 minutes
ARRAY Type
-- Create table with ARRAY columns
CREATE TABLE product_tags (
product_id SERIAL PRIMARY KEY,
product_name VARCHAR(100),
tags TEXT[],
prices DECIMAL(10,2)[],
attributes JSONB
);
INSERT INTO product_tags VALUES
(1, 'Laptop', ARRAY['electronics', 'computers', 'sale'], ARRAY[999.99, 899.99], '{"color": "silver", "weight": 2.5}'),
(2, 'Phone', ARRAY['electronics', 'mobile'], ARRAY[699.99, 599.99], '{"color": "black", "weight": 0.3}'),
(3, 'Book', ARRAY['education', 'reference'], ARRAY[29.99, 24.99], '{"pages": 500, "format": "hardcover"}');
-- Array operations
SELECT
product_name,
tags,
array_length(tags, 1) AS tag_count,
tags[1] AS first_tag,
array_append(tags, 'new_tag') AS with_new_tag,
array_remove(tags, 'sale') AS without_sale,
array_cat(tags, ARRAY['extra']) AS concatenated,
array_to_string(tags, ', ') AS tags_string
FROM product_tags;
-- Array containment
SELECT * FROM product_tags
WHERE tags @> ARRAY['electronics'];
-- Array overlap
SELECT * FROM product_tags
WHERE tags && ARRAY['electronics', 'education'];
-- Array uniqueness
SELECT * FROM product_tags
WHERE array_ndims(tags) = 1;
-- Unnest array to rows
SELECT
product_name,
UNNEST(tags) AS tag
FROM product_tags
ORDER BY product_name, tag;
Output:
| product_name | tag |
|---|---|
| Book | education |
| Book | reference |
| Laptop | computers |
| Laptop | electronics |
| Laptop | sale |
| Phone | electronics |
| Phone | mobile |
MAP Type (HStore)
-- PostgreSQL uses hstore for map-like structures
CREATE EXTENSION IF NOT EXISTS hstore;
CREATE TABLE user_profiles (
user_id SERIAL PRIMARY KEY,
name VARCHAR(100),
settings HSTORE,
preferences JSONB
);
INSERT INTO user_profiles VALUES
(1, 'Alice',
'theme => dark, language => en, notifications => true'::hstore,
'{"color": "blue", "font_size": 12}'::jsonb),
(2, 'Bob',
'theme => light, language => es'::hstore,
'{"color": "red", "font_size": 14}'::jsonb);
-- Hstore operations
SELECT
name,
settings,
settings->'theme' AS theme,
settings['language'] AS language,
settings ? 'notifications' AS has_notifications,
settings - 'theme' AS without_theme,
settings || 'font => large'::hstore AS with_new_key,
akeys(settings) AS all_keys,
avals(settings) AS all_values
FROM user_profiles;
-- Convert between hstore and jsonb
SELECT
name,
hstore_to_jsonb(settings) AS as_jsonb,
jsonb_to_hstore(preferences) AS as_hstore
FROM user_profiles;
STRUCT Type (Composite)
-- Create composite type
CREATE TYPE address AS (
street VARCHAR(100),
city VARCHAR(50),
state VARCHAR(2),
zip VARCHAR(10)
);
CREATE TYPE person_info AS (
name VARCHAR(100),
birth_date DATE,
home address,
work address
);
-- Use composite type
CREATE TABLE employees (
emp_id SERIAL PRIMARY KEY,
info person_info,
salary DECIMAL(12,2)
);
INSERT INTO employees VALUES
(1, ROW('Alice', '1990-01-01',
ROW('123 Main St', 'SF', 'CA', '94102'),
ROW('456 Work Ave', 'SF', 'CA', '94105')::address
)::person_info,
120000.00);
-- Access composite fields
SELECT
(info).name,
(info).birth_date,
(info).home,
(info).work,
((info).home).city AS home_city,
((info).work).city AS work_city
FROM employees;
-- Expand composite to columns
SELECT
(info).name AS name,
(info).home AS home_address
FROM employees;
-- Create table from composite
CREATE TABLE employee_addresses AS
SELECT
emp_id,
(info).name AS name,
((info).home).* AS home
FROM employees;
Range Types
-- PostgreSQL range types
CREATE TABLE reservations (
reservation_id SERIAL PRIMARY KEY,
room_id INT,
during TSRANGE,
guest_name VARCHAR(100)
);
INSERT INTO reservations VALUES
(1, 101, '[2024-06-01 14:00, 2024-06-05 11:00)'::tsrange, 'Alice'),
(2, 101, '[2024-06-06 14:00, 2024-06-10 11:00)'::tsrange, 'Bob'),
(3, 102, '[2024-06-01 14:00, 2024-06-03 11:00)'::tsrange, 'Charlie');
-- Range operations
SELECT
reservation_id,
room_id,
guest_name,
during,
lower(during) AS start_time,
upper(during) AS end_time,
upper(during) - lower(during) AS duration,
isempty(during) AS is_empty,
lower_inf(during) AS start_infinite,
upper_inf(during) AS end_infinite
FROM reservations;
-- Range containment
SELECT * FROM reservations
WHERE during @> '2024-06-02'::timestamp;
-- Range overlap
SELECT r1.*, r2.*
FROM reservations r1
JOIN reservations r2 ON r1.room_id = r2.room_id
AND r1.during && r2.during
WHERE r1.reservation_id < r2.reservation_id;
-- Range adjacency
SELECT * FROM reservations
WHERE room_id = 101
ORDER BY during;
-- Range union
SELECT
room_id,
range_merge(during) AS merged_range,
array_agg(guest_name) AS guests
FROM reservations
GROUP BY room_id;
-- Int4range, Numrange, Date ranges
SELECT '[1, 10]'::int4range; -- Integer range
SELECT '[1.5, 9.5]'::numrange; -- Numeric range
SELECT '[2024-01-01, 2024-12-31]'::daterange; -- Date range
JSONB vs ARRAY vs HStore
| Feature | JSONB | ARRAY | HStore |
|---|---|---|---|
| Type Safety | No | Yes | No |
| Nested Data | Yes | Limited | No |
| Indexing | GIN | GiST | GIN |
| Query Language | Path | Operators | Keys |
| Schema Flexibility | High | Medium | High |
| Storage | Medium | Low | Low |
-- Compare performance
CREATE TABLE jsonb_table (data JSONB);
CREATE TABLE array_table (data TEXT[]);
CREATE TABLE hstore_table (data HSTORE);
-- Insert test data
INSERT INTO jsonb_table
SELECT jsonb_build_object('key', i)
FROM generate_series(1, 100000) i;
INSERT INTO array_table
SELECT ARRAY[i::text]
FROM generate_series(1, 100000) i;
INSERT INTO hstore_table
SELECT hstore('key', i::text)
FROM generate_series(1, 100000) i;
-- Compare query times
\timing on
SELECT * FROM jsonb_table WHERE data @> '{"key": 50000}';
SELECT * FROM array_table WHERE data @> ARRAY['50000'];
SELECT * FROM hstore_table WHERE data @> 'key => 50000';
Advanced Type Operations
-- Array aggregation
SELECT
department,
ARRAY_AGG(name ORDER BY salary DESC) AS employees_by_salary,
ARRAY_AGG(DISTINCT skill) AS unique_skills
FROM employee_skills
GROUP BY department;
-- JSONB to array
SELECT
product_name,
jsonb_array_elements_text(tags) AS tag
FROM product_tags;
-- Array to JSONB
SELECT
product_name,
to_jsonb(tags) AS tags_jsonb
FROM product_tags;
-- Nested array operations
CREATE TABLE matrix (
id SERIAL PRIMARY KEY,
data INT[][]
);
INSERT INTO matrix VALUES
(1, '{{1,2,3},{4,5,6},{7,8,9}}'::int[][]);
SELECT
data[1][1] AS element,
array_length(data, 1) AS rows,
array_length(data, 2) AS cols
FROM matrix;
Mathematical Properties
For array operations:
For range operations:
For JSONB containment:
βΉοΈ
Performance Tip: Use ARRAY for simple lists, JSONB for complex nested structures, and HStore for key-value pairs with known keys.
Type Conversion Patterns
-- Convert between types
SELECT
ARRAY[1, 2, 3]::TEXT[], -- Cast array
'{1,2,3}'::INT[], -- Parse text to array
'[1,2,3]'::JSONB, -- Text to JSONB
'[1,2,3]'::JSONB->0, -- Get first element
'{"a":1}'::HSTORE, -- Text to hstore
ROW(1, 'test')::RECORD; -- Create record
-- Safe conversion
CREATE OR REPLACE FUNCTION safe_cast(text, anyelement)
RETURNS anyelement AS $$
BEGIN
RETURN $1::text::anyelement;
EXCEPTION
WHEN OTHERS THEN
RETURN $2; -- Return default
END;
$$ LANGUAGE plpgsql;
SELECT safe_cast('123', 0); -- Returns 123
SELECT safe_cast('abc', 0); -- Returns 0
Type-Specific Indexing
-- GIN index for arrays
CREATE INDEX idx_tags_gin ON product_tags USING gin(tags);
-- GIN index for JSONB
CREATE INDEX idx_attributes_gin ON product_tags USING gin(attributes);
-- GiST index for ranges
CREATE INDEX idx_reservations_gist ON reservations USING gist(during);
-- B-tree index for specific array elements
CREATE INDEX idx_first_tag ON product_tags ((tags[1]));
-- Expression index for JSONB
CREATE INDEX idx_color ON product_tags ((attributes->>'color'));
β οΈ
Storage Overhead: Complex types increase storage. Monitor with pg_column_size() and consider normalization for frequently queried fields.