Graph RAG

Why Graph RAG?

Traditional RAG retrieves flat text chunks based on semantic similarity. Graph RAG leverages knowledge graphs to capture relationships between entities, enabling multi-hop reasoning and structured retrieval.

Knowledge Graph Construction

Entity and Relation Extraction

import spacy
from dataclasses import dataclass

@dataclass
class Entity:
    name: str
    entity_type: str
    properties: dict

@dataclass
class Relation:
    source: str
    target: str
    relation_type: str
    properties: dict

class KnowledgeGraphBuilder:
    def __init__(self, llm):
        self.nlp = spacy.load("en_core_web_trf")
        self.llm = llm
        self.entities: dict[str, Entity] = {}
        self.relations: list[Relation] = []

    def extract_from_text(self, text: str) -> tuple[list[Entity], list[Relation]]:
        # SpaCy NER for initial entity detection
        doc = self.nlp(text)
        entities = []
        for ent in doc.ents:
            entity = Entity(
                name=ent.text,
                entity_type=ent.label_,
                properties={"start": ent.start_char, "end": ent.end_char}
            )
            entities.append(entity)
            self.entities[ent.text] = entity

        # LLM for relation extraction
        relations = self._extract_relations(text, entities)
        self.relations.extend(relations)

        return entities, relations

    def _extract_relations(self, text: str, entities: list[Entity]) -> list[Relation]:
        entity_names = [e.name for e in entities[:20]]
        prompt = f"""Extract relationships between these entities from the text.
        Return as JSON list with source, target, and relation_type.

        Entities: {', '.join(entity_names)}
        Text: {text[:2000]}

        Relationships:"""

        response = self.llm.generate(prompt)
        return self._parse_relations(response)

    def _parse_relations(self, response: str) -> list[Relation]:
        import json
        try:
            data = json.loads(response)
            return [Relation(**r) for r in data]
        except:
            return []

Graph Storage with Neo4j

from neo4j import GraphDatabase

class GraphStore:
    def __init__(self, uri: str, user: str, password: str):
        self.driver = GraphDatabase.driver(uri, auth=(user, password))

    def close(self):
        self.driver.close()

    def create_entity(self, entity: Entity):
        query = f"""
        MERGE (e:{entity.entity_type} {{name: $name}})
        SET e += $properties
        """
        with self.driver.session() as session:
            session.run(query, name=entity.name, properties=entity.properties)

    def create_relation(self, relation: Relation):
        query = f"""
        MATCH (a {{name: $source}})
        MATCH (b {{name: $target}})
        MERGE (a)-[r:{relation.relation_type}]->(b)
        SET r += $properties
        """
        with self.driver.session() as session:
            session.run(query, source=relation.source, target=relation.target,
                       properties=relation.properties)

    def query_graph(self, cypher_query: str) -> list[dict]:
        with self.driver.session() as session:
            result = session.run(cypher_query)
            return [dict(record) for record in result]

    def get_entity_neighbors(self, entity_name: str, depth: int = 2) -> dict:
        query = f"""
        MATCH path = (start {{name: $name}})-[*1..{depth}]-(neighbor)
        RETURN path, length(path) as distance
        LIMIT 50
        """
        return self.query_graph(query)

Graph-Based Retrieval

Multi-Hop Reasoning

class GraphRetriever:
    def __init__(self, graph_store, vector_store, llm):
        self.graph = graph_store
        self.vector = vector_store
        self.llm = llm

    def retrieve(self, query: str, hops: int = 2) -> dict:
        # Step 1: Extract entities from query
        entities = self._extract_query_entities(query)

        # Step 2: Graph traversal for structured context
        graph_context = []
        for entity in entities:
            neighbors = self.graph.get_entity_neighbors(entity, depth=hops)
            graph_context.extend(neighbors)

        # Step 3: Vector search for semantic context
        vector_results = self.vector.search(query, top_k=5)

        # Step 4: Combine graph and vector results
        return {
            "graph_context": graph_context,
            "vector_context": vector_results,
            "entities": entities
        }

    def _extract_query_entities(self, query: str) -> list[str]:
        prompt = f"""Extract the key entities (people, organizations, concepts)
        from this question. Return as comma-separated list.

        Question: {query}
        Entities:"""

        response = self.llm.generate(prompt)
        return [e.strip() for e in response.split(',') if e.strip()]

Graph Community Detection

import networkx as nx
from community import community_louvain

class GraphCommunityDetector:
    def __init__(self, graph_store):
        self.graph_store = graph_store

    def detect_communities(self) -> list[dict]:
        # Build NetworkX graph from Neo4j
        nx_graph = self._build_nx_graph()

        # Louvain community detection
        partition = community_louvain.best_partition(nx_graph)

        # Group entities by community
        communities = {}
        for node, comm_id in partition.items():
            if comm_id not in communities:
                communities[comm_id] = []
            communities[comm_id].append(node)

        return communities

    def summarize_communities(self, communities: dict, llm) -> list[dict]:
        summaries = []
        for comm_id, members in communities.items():
            prompt = f"""Summarize the key themes and relationships among these entities:

            Entities: {', '.join(members[:20])}

            Summary:"""

            summary = llm.generate(prompt)
            summaries.append({
                "community_id": comm_id,
                "members": members,
                "summary": summary
            })
        return summaries

Graph RAG Patterns

Pattern	Description	Best For
Entity-centric	Retrieve by entity relationships	Factual Q&A
Community-based	Pre-summarized graph communities	Large-scale KGs
Hybrid Graph+Vector	Combine graph traversal with semantic search	Complex reasoning
Temporal Graph	Time-aware knowledge graphs	Event-based queries

Graph RAG vs Traditional RAG

Aspect	Traditional RAG	Graph RAG
Retrieval	Flat text chunks	Entity relationships
Multi-hop	Limited	Native support
Structure	Unstructured	Semi-structured
Reasoning	Semantic similarity	Graph traversal
Complexity	Low	High
Best for	General Q&A	Structured knowledge

Graph RAG excels when queries require reasoning across multiple connected entities or when the knowledge base has rich relational structure.