Why Graph RAG?
Traditional RAG retrieves flat text chunks based on semantic similarity. Graph RAG leverages knowledge graphs to capture relationships between entities, enabling multi-hop reasoning and structured retrieval.
Knowledge Graph Construction
Entity and Relation Extraction
import spacy
from dataclasses import dataclass
@dataclass
class Entity:
name: str
entity_type: str
properties: dict
@dataclass
class Relation:
source: str
target: str
relation_type: str
properties: dict
class KnowledgeGraphBuilder:
def __init__(self, llm):
self.nlp = spacy.load("en_core_web_trf")
self.llm = llm
self.entities: dict[str, Entity] = {}
self.relations: list[Relation] = []
def extract_from_text(self, text: str) -> tuple[list[Entity], list[Relation]]:
# SpaCy NER for initial entity detection
doc = self.nlp(text)
entities = []
for ent in doc.ents:
entity = Entity(
name=ent.text,
entity_type=ent.label_,
properties={"start": ent.start_char, "end": ent.end_char}
)
entities.append(entity)
self.entities[ent.text] = entity
# LLM for relation extraction
relations = self._extract_relations(text, entities)
self.relations.extend(relations)
return entities, relations
def _extract_relations(self, text: str, entities: list[Entity]) -> list[Relation]:
entity_names = [e.name for e in entities[:20]]
prompt = f"""Extract relationships between these entities from the text.
Return as JSON list with source, target, and relation_type.
Entities: {', '.join(entity_names)}
Text: {text[:2000]}
Relationships:"""
response = self.llm.generate(prompt)
return self._parse_relations(response)
def _parse_relations(self, response: str) -> list[Relation]:
import json
try:
data = json.loads(response)
return [Relation(**r) for r in data]
except:
return []
Graph Storage with Neo4j
from neo4j import GraphDatabase
class GraphStore:
def __init__(self, uri: str, user: str, password: str):
self.driver = GraphDatabase.driver(uri, auth=(user, password))
def close(self):
self.driver.close()
def create_entity(self, entity: Entity):
query = f"""
MERGE (e:{entity.entity_type} {{name: $name}})
SET e += $properties
"""
with self.driver.session() as session:
session.run(query, name=entity.name, properties=entity.properties)
def create_relation(self, relation: Relation):
query = f"""
MATCH (a {{name: $source}})
MATCH (b {{name: $target}})
MERGE (a)-[r:{relation.relation_type}]->(b)
SET r += $properties
"""
with self.driver.session() as session:
session.run(query, source=relation.source, target=relation.target,
properties=relation.properties)
def query_graph(self, cypher_query: str) -> list[dict]:
with self.driver.session() as session:
result = session.run(cypher_query)
return [dict(record) for record in result]
def get_entity_neighbors(self, entity_name: str, depth: int = 2) -> dict:
query = f"""
MATCH path = (start {{name: $name}})-[*1..{depth}]-(neighbor)
RETURN path, length(path) as distance
LIMIT 50
"""
return self.query_graph(query)
Graph-Based Retrieval
Multi-Hop Reasoning
class GraphRetriever:
def __init__(self, graph_store, vector_store, llm):
self.graph = graph_store
self.vector = vector_store
self.llm = llm
def retrieve(self, query: str, hops: int = 2) -> dict:
# Step 1: Extract entities from query
entities = self._extract_query_entities(query)
# Step 2: Graph traversal for structured context
graph_context = []
for entity in entities:
neighbors = self.graph.get_entity_neighbors(entity, depth=hops)
graph_context.extend(neighbors)
# Step 3: Vector search for semantic context
vector_results = self.vector.search(query, top_k=5)
# Step 4: Combine graph and vector results
return {
"graph_context": graph_context,
"vector_context": vector_results,
"entities": entities
}
def _extract_query_entities(self, query: str) -> list[str]:
prompt = f"""Extract the key entities (people, organizations, concepts)
from this question. Return as comma-separated list.
Question: {query}
Entities:"""
response = self.llm.generate(prompt)
return [e.strip() for e in response.split(',') if e.strip()]
Graph Community Detection
import networkx as nx
from community import community_louvain
class GraphCommunityDetector:
def __init__(self, graph_store):
self.graph_store = graph_store
def detect_communities(self) -> list[dict]:
# Build NetworkX graph from Neo4j
nx_graph = self._build_nx_graph()
# Louvain community detection
partition = community_louvain.best_partition(nx_graph)
# Group entities by community
communities = {}
for node, comm_id in partition.items():
if comm_id not in communities:
communities[comm_id] = []
communities[comm_id].append(node)
return communities
def summarize_communities(self, communities: dict, llm) -> list[dict]:
summaries = []
for comm_id, members in communities.items():
prompt = f"""Summarize the key themes and relationships among these entities:
Entities: {', '.join(members[:20])}
Summary:"""
summary = llm.generate(prompt)
summaries.append({
"community_id": comm_id,
"members": members,
"summary": summary
})
return summaries
Graph RAG Patterns
| Pattern | Description | Best For |
|---|---|---|
| Entity-centric | Retrieve by entity relationships | Factual Q&A |
| Community-based | Pre-summarized graph communities | Large-scale KGs |
| Hybrid Graph+Vector | Combine graph traversal with semantic search | Complex reasoning |
| Temporal Graph | Time-aware knowledge graphs | Event-based queries |
Graph RAG vs Traditional RAG
| Aspect | Traditional RAG | Graph RAG |
|---|---|---|
| Retrieval | Flat text chunks | Entity relationships |
| Multi-hop | Limited | Native support |
| Structure | Unstructured | Semi-structured |
| Reasoning | Semantic similarity | Graph traversal |
| Complexity | Low | High |
| Best for | General Q&A | Structured knowledge |
Graph RAG excels when queries require reasoning across multiple connected entities or when the knowledge base has rich relational structure.