πŸŽ‰ 75% of content is free forever β€” Unlock Premium from $10/mo β†’
CW
Search courses…
πŸ’Ό Servicesℹ️ Aboutβœ‰οΈ ContactView Pricing Plansfrom $10

Graph Queries in SQL: MATCH, Path Functions, Shortest Path

Advanced SQLGraph Queries⭐ Premium

Advertisement

Interview Question: "How would you find the shortest path between two nodes in SQL? Explain the MATCH clause and path functions. Compare recursive CTEs vs MATCH for graph traversal." β€” Asked at LinkedIn, Twitter, Meta for Graph Database roles

ℹ️

Difficulty: Advanced | Companies: LinkedIn, Twitter, Meta, Pinterest, Amazon Neptune | Time: 60-75 minutes

Graph Data Model in SQL

-- Create graph tables (node and edge tables)
CREATE TABLE graph_nodes (
    node_id INT PRIMARY KEY,
    node_type VARCHAR(50),
    label VARCHAR(100),
    properties JSONB
);

CREATE TABLE graph_edges (
    edge_id SERIAL PRIMARY KEY,
    source_node INT REFERENCES graph_nodes(node_id),
    target_node INT REFERENCES graph_nodes(node_id),
    edge_type VARCHAR(50),
    weight DECIMAL(10,2),
    properties JSONB
);

-- Insert social network data
INSERT INTO graph_nodes VALUES
(1, 'person', 'Alice', '{"age": 30, "city": "SF"}'),
(2, 'person', 'Bob', '{"age": 25, "city": "NYC"}'),
(3, 'person', 'Charlie', '{"age": 35, "city": "SF"}'),
(4, 'person', 'Diana', '{"age": 28, "city": "LA"}'),
(5, 'person', 'Eve', '{"age": 32, "city": "NYC"}'),
(6, 'company', 'TechCorp', '{"industry": "tech"}'),
(7, 'company', 'FinanceInc', '{"industry": "finance"}'),
(8, 'skill', 'SQL', '{"level": "advanced"}'),
(9, 'skill', 'Python', '{"level": "intermediate"}'),
(10, 'skill', 'Java', '{"level": "beginner"}');

INSERT INTO graph_edges (source_node, target_node, edge_type, weight, properties) VALUES
(1, 2, 'knows', 0.8, '{"since": "2020"}'),
(1, 3, 'knows', 0.9, '{"since": "2019"}'),
(2, 4, 'knows', 0.7, '{"since": "2021"}'),
(3, 5, 'knows', 0.85, '{"since": "2020"}'),
(4, 5, 'knows', 0.6, '{"since": "2022"}'),
(1, 6, 'works_at', NULL, '{"role": "engineer"}'),
(2, 7, 'works_at', NULL, '{"role": "analyst"}'),
(3, 6, 'works_at', NULL, '{"role": "manager"}'),
(1, 8, 'has_skill', 0.9, NULL),
(1, 9, 'has_skill', 0.7, NULL),
(2, 8, 'has_skill', 0.8, NULL),
(3, 9, 'has_skill', 0.85, NULL),
(4, 10, 'has_skill', 0.6, NULL),
(5, 8, 'has_skill', 0.75, NULL),
(5, 9, 'has_skill', 0.8, NULL);

Recursive CTE Graph Traversal

-- Find all paths from Alice to Eve
WITH RECURSIVE graph_paths AS (
    -- Anchor: Start from Alice
    SELECT 
        node_id,
        label,
        1 AS depth,
        ARRAY[node_id] AS path,
        label::TEXT AS path_text,
        0.0 AS total_weight
    FROM graph_nodes
    WHERE label = 'Alice'
    
    UNION ALL
    
    -- Recursive: Traverse edges
    SELECT 
        n.node_id,
        n.label,
        p.depth + 1,
        p.path || n.node_id,
        p.path_text || ' β†’ ' || n.label,
        p.total_weight + COALESCE(e.weight, 0)
    FROM graph_paths p
    INNER JOIN graph_edges e ON p.node_id = e.source_node
    INNER JOIN graph_nodes n ON e.target_node = n.node_id
    WHERE n.node_id != ALL(p.path)  -- No cycles
        AND p.depth < 10  -- Max depth
)
SELECT 
    path_text,
    depth,
    total_weight,
    path
FROM graph_paths
WHERE label = 'Eve'
ORDER BY depth, total_weight;

Output:

path_textdepthtotal_weightpath
Alice β†’ Bob β†’ Diana β†’ Eve42.10{1,2,4,5}
Alice β†’ Charlie β†’ Eve31.75{1,3,5}
Alice β†’ Bob β†’ Diana β†’ Eve42.10{1,2,4,5}

Shortest Path with Dijkstra

-- Dijkstra's algorithm using recursive CTE
WITH RECURSIVE dijkstra AS (
    -- Initialize: source node has distance 0
    SELECT 
        node_id,
        label,
        0 AS distance,
        ARRAY[node_id] AS path,
        label::TEXT AS path_text
    FROM graph_nodes
    WHERE label = 'Alice'
    
    UNION ALL
    
    -- Relax edges
    SELECT 
        n.node_id,
        n.label,
        d.distance + COALESCE(e.weight, 1),
        d.path || n.node_id,
        d.path_text || ' β†’ ' || n.label
    FROM dijkstra d
    INNER JOIN graph_edges e ON d.node_id = e.source_node
    INNER JOIN graph_nodes n ON e.target_node = n.node_id
    WHERE n.node_id != ALL(d.path)
        AND d.distance + COALESCE(e.weight, 1) < (
            SELECT MIN(distance) FROM dijkstra WHERE node_id = n.node_id
        )
)
SELECT DISTINCT ON (node_id)
    node_id,
    label,
    distance,
    path_text
FROM dijkstra
WHERE label = 'Eve'
ORDER BY node_id, distance;

Bidirectional Search

-- Bidirectional BFS from both source and target
WITH RECURSIVE 
forward_search AS (
    SELECT 
        node_id,
        1 AS depth,
        ARRAY[node_id] AS path
    FROM graph_nodes WHERE label = 'Alice'
    
    UNION ALL
    
    SELECT 
        n.node_id,
        f.depth + 1,
        f.path || n.node_id
    FROM forward_search f
    JOIN graph_edges e ON f.node_id = e.source_node
    JOIN graph_nodes n ON e.target_node = n.node_id
    WHERE n.node_id != ALL(f.path) AND f.depth < 5
),
backward_search AS (
    SELECT 
        node_id,
        1 AS depth,
        ARRAY[node_id] AS path
    FROM graph_nodes WHERE label = 'Eve'
    
    UNION ALL
    
    SELECT 
        n.node_id,
        b.depth + 1,
        b.path || n.node_id
    FROM backward_search b
    JOIN graph_edges e ON b.node_id = e.target_node
    JOIN graph_nodes n ON e.source_node = n.node_id
    WHERE n.node_id != ALL(b.path) AND b.depth < 5
)
SELECT 
    f.path || b.path[2:] AS full_path,
    f.depth + b.depth - 1 AS total_depth
FROM forward_search f
JOIN backward_search b ON f.node_id = b.node_id
WHERE f.node_id NOT IN (SELECT node_id FROM graph_nodes WHERE label IN ('Alice', 'Eve'))
ORDER BY total_depth
LIMIT 5;

Graph Centrality Measures

-- Degree centrality
SELECT 
    n.node_id,
    n.label,
    COUNT(DISTINCT e1.edge_id) AS out_degree,
    COUNT(DISTINCT e2.edge_id) AS in_degree,
    COUNT(DISTINCT e1.edge_id) + COUNT(DISTINCT e2.edge_id) AS total_degree
FROM graph_nodes n
LEFT JOIN graph_edges e1 ON n.node_id = e1.source_node
LEFT JOIN graph_edges e2 ON n.node_id = e2.target_node
GROUP BY n.node_id, n.label
ORDER BY total_degree DESC;

-- Betweenness centrality (simplified)
WITH all_shortest_paths AS (
    SELECT 
        source_node,
        target_node,
        path,
        path_text
    FROM graph_paths
    WHERE source_node != target_node
)
SELECT 
    n.node_id,
    n.label,
    COUNT(*) FILTER (WHERE n.node_id = ANY(
        SELECT unnest(path[2:array_length(path, 1)-1])
    )) AS betweenness_count
FROM graph_nodes n
CROSS JOIN all_shortest_paths p
GROUP BY n.node_id, n.label
ORDER BY betweenness_count DESC;

MATCH Clause (ISO SQL/PGQ)

-- PostgreSQL 14+ graph query syntax (experimental)
-- CREATE PROPERTY GRAPH for graph queries

-- Simulated MATCH with recursive CTE
WITH RECURSIVE match_query AS (
    -- Pattern: (a:person)-[:knows]->(b:person)
    SELECT 
        a.node_id AS a_id,
        a.label AS a_name,
        b.node_id AS b_id,
        b.label AS b_name,
        e.weight
    FROM graph_nodes a
    JOIN graph_edges e ON a.node_id = e.source_node
    JOIN graph_nodes b ON e.target_node = b.node_id
    WHERE a.node_type = 'person'
        AND b.node_type = 'person'
        AND e.edge_type = 'knows'
)
SELECT * FROM match_query;

Graph Pattern Matching

-- Find triangles (3-clique)
SELECT 
    a.label AS person_a,
    b.label AS person_b,
    c.label AS person_c
FROM graph_nodes a
JOIN graph_edges e1 ON a.node_id = e1.source_node
JOIN graph_nodes b ON e1.target_node = b.node_id
JOIN graph_edges e2 ON b.node_id = e2.source_node
JOIN graph_nodes c ON e2.target_node = c.node_id
JOIN graph_edges e3 ON c.node_id = e3.source_node
WHERE e3.target_node = a.node_id
    AND a.node_id < b.node_id
    AND b.node_id < c.node_id;

-- Find 2-hop neighbors
WITH RECURSIVE two_hop AS (
    SELECT 
        n1.node_id AS source,
        n2.node_id AS neighbor,
        1 AS hops
    FROM graph_nodes n1
    JOIN graph_edges e ON n1.node_id = e.source_node
    JOIN graph_nodes n2 ON e.target_node = n2.node_id
    
    UNION
    
    SELECT 
        t.source,
        n3.node_id,
        2
    FROM two_hop t
    JOIN graph_edges e ON t.neighbor = e.source_node
    JOIN graph_nodes n3 ON e.target_node = n3.node_id
    WHERE n3.node_id != t.source
)
SELECT DISTINCT
    s.label AS source,
    n.label AS two_hop_neighbor
FROM two_hop t
JOIN graph_nodes s ON t.source = s.node_id
JOIN graph_nodes n ON t.neighbor = n.node_id
WHERE t.hops = 2
ORDER BY s.label, n.label;

Graph Aggregation

-- Community detection (label propagation simplified)
WITH RECURSIVE communities AS (
    SELECT 
        node_id,
        node_id AS community_id,
        1 AS iteration
    FROM graph_nodes
    
    UNION ALL
    
    SELECT 
        c.node_id,
        mode() WITHIN GROUP (ORDER BY c2.community_id),
        c.iteration + 1
    FROM communities c
    JOIN graph_edges e ON c.node_id = e.source_node
    JOIN communities c2 ON e.target_node = c2.node_id
    WHERE c.iteration < 10
    GROUP BY c.node_id, c.iteration
    HAVING mode() WITHIN GROUP (ORDER BY c2.community_id) != c.community_id
)
SELECT DISTINCT
    n.label,
    c.community_id
FROM communities c
JOIN graph_nodes n ON c.node_id = n.node_id
WHERE c.iteration = (SELECT MAX(iteration) FROM communities)
ORDER BY c.community_id, n.label;

Mathematical Graph Theory

Graph G=(V,E)G = (V, E) where:

  • ∣V∣=n|V| = n nodes
  • ∣E∣=m|E| = m edges

Adjacency matrix AA:

Aij={1if (i,j)∈E0otherwiseA_{ij} = \begin{cases} 1 & \text{if } (i,j) \in E \\ 0 & \text{otherwise} \end{cases}

Degree centrality:

CD(v)=deg⁑(v)nβˆ’1C_D(v) = \frac{\deg(v)}{n-1}

Betweenness centrality:

CB(v)=βˆ‘sβ‰ vβ‰ tΟƒst(v)ΟƒstC_B(v) = \sum_{s \neq v \neq t} \frac{\sigma_{st}(v)}{\sigma_{st}}

ℹ️

Graph Algorithms: For production graph workloads, consider dedicated graph databases like Neo4j, Amazon Neptune, or Apache AGE extension for PostgreSQL.

Performance Considerations

OperationTime ComplexitySpace Complexity
BFSO(V+E)O(V + E)O(V)O(V)
DFSO(V+E)O(V + E)O(V)O(V)
DijkstraO((V+E)log⁑V)O((V + E) \log V)O(V)O(V)
BetweennessO(Vβ‹…E)O(V \cdot E)O(V+E)O(V + E)

⚠️

SQL Limitations: SQL graph queries are less efficient than native graph databases for deep traversals. Use recursive CTEs for shallow graphs (< 10 levels).

Advertisement