πŸŽ‰ 75% of content is free forever β€” Unlock Premium from $10/mo β†’
CW
Search courses…
πŸ’Ό Servicesℹ️ Aboutβœ‰οΈ ContactView Pricing Plansfrom $10

Named Entity Linking

Information ExtractionEntity Linking and Knowledge Base Integration🟒 Free Lesson

Advertisement

Named Entity Linking

Named Entity Linking (NEL) identifies entities in text and links them to corresponding entries in a knowledge base. It combines entity recognition, disambiguation, and knowledge base lookup.

Entity Linking Stages

StageInputOutputExample
NERTextEntity spans"Apple" β†’ ORG
Candidate GenerationMentionCandidate entities"Apple" β†’ [Apple Inc., Apple Records]
DisambiguationCandidatesBest entity"Apple" β†’ Apple Inc. (Q312)
LinkingEntityKB IDApple Inc. β†’ Q312

Named Entity Recognition (NER)

from transformers import AutoTokenizer, AutoModelForTokenClassification
import torch

class NERExtractor:
    def __init__(self, model_name="dslim/bert-base-NER"):
        self.tokenizer = AutoTokenizer.from_pretrained(model_name)
        self.model = AutoModelForTokenClassification.from_pretrained(model_name)
        self.label_map = self.model.config.id2label

    def extract_entities(self, text):
        inputs = self.tokenizer(text, return_tensors="pt", truncation=True)

        with torch.no_grad():
            outputs = self.model(**inputs)
            predictions = torch.argmax(outputs.logits, dim=-1)

        tokens = self.tokenizer.convert_ids_to_tokens(inputs["input_ids"][0])
        labels = [self.label_map[p.item()] for p in predictions[0]]

        entities = []
        current_entity = None

        for token, label in zip(tokens, labels):
            if label.startswith("B-"):
                if current_entity:
                    entities.append(current_entity)
                current_entity = {
                    "text": token,
                    "type": label[2:],
                    "start": 0
                }
            elif label.startswith("I-") and current_entity:
                current_entity["text"] += " " + token
            else:
                if current_entity:
                    entities.append(current_entity)
                    current_entity = None

        if current_entity:
            entities.append(current_entity)

        return entities

ner = NERExtractor()
entities = ner.extract_entities("Apple was founded by Steve Jobs in California.")
for e in entities:
    print(f"{e['text']} [{e['type']}]")
# Apple [ORG]
# Steve Jobs [PER]
# California [LOC]

Entity Disambiguation

from transformers import AutoModel, AutoTokenizer
import torch

class EntityDisambiguator:
    def __init__(self):
        self.tokenizer = AutoTokenizer.from_pretrained(
            "sentence-transformers/all-MiniLM-L6-v2"
        )
        self.model = AutoModel.from_pretrained(
            "sentence-transformers/all-MiniLM-L6-v2"
        )
        self.candidate_embeddings = {}
        self.candidate_metadata = {}

    def add_candidate(self, entity_id, name, description):
        """Add a candidate entity to the knowledge base"""
        input_text = f"{name} {description}"
        inputs = self.tokenizer(input_text, return_tensors="pt", truncation=True)

        with torch.no_grad():
            outputs = self.model(**inputs)
            embedding = outputs.last_hidden_state[:, 0, :].squeeze()

        self.candidate_embeddings[entity_id] = embedding
        self.candidate_metadata[entity_id] = {
            "name": name,
            "description": description
        }

    def disambiguate(self, mention, context):
        """Find the best matching entity for a mention"""
        # Encode mention with context
        input_text = f"{mention}: {context}"
        inputs = self.tokenizer(input_text, return_tensors="pt", truncation=True)

        with torch.no_grad():
            outputs = self.model(**inputs)
            mention_embedding = outputs.last_hidden_state[:, 0, :].squeeze()

        # Find most similar candidate
        best_score = -1
        best_entity = None

        for entity_id, emb in self.candidate_embeddings.items():
            score = torch.nn.functional.cosine_similarity(
                mention_embedding.unsqueeze(0),
                emb.unsqueeze(0)
            ).item()

            if score > best_score:
                best_score = score
                best_entity = entity_id

        return {
            "entity_id": best_entity,
            "score": best_score,
            "metadata": self.candidate_metadata.get(best_entity, {})
        }

disambiguator = EntityDisambiguator()

# Add candidates
disambiguator.add_candidate(
    "Q312", "Apple Inc.",
    "American multinational technology company that designs consumer electronics"
)
disambiguator.add_candidate(
    "Q16504", "Apple Records",
    "British record label founded by The Beatles"
)

# Disambiguate
result = disambiguator.disambiguate(
    "Apple",
    "Steve Jobs co-founded Apple in 1976"
)
print(result)
# {'entity_id': 'Q312', 'score': 0.92, 'metadata': {'name': 'Apple Inc.', ...}}

Knowledge Base Integration

import requests

class KnowledgeBaseLinker:
    def __init__(self, kb_api="https://www.wikidata.org/w/api.php"):
        self.api_url = kb_api
        self.cache = {}

    def search_entity(self, name, language="en"):
        """Search Wikidata for entity candidates"""
        params = {
            "action": "wbsearchentities",
            "search": name,
            "language": language,
            "format": "json",
            "limit": 5
        }

        response = requests.get(self.api_url, params=params)
        data = response.json()

        return [{
            "id": item["id"],
            "label": item.get("label", ""),
            "description": item.get("description", ""),
            "url": item.get("concepturi", "")
        } for item in data.get("search", [])]

    def get_entity_details(self, entity_id):
        """Get detailed information about an entity"""
        if entity_id in self.cache:
            return self.cache[entity_id]

        params = {
            "action": "wbgetentities",
            "ids": entity_id,
            "format": "json",
            "props": "labels|descriptions|claims"
        }

        response = requests.get(self.api_url, params=params)
        data = response.json()

        entity = data.get("entities", {}).get(entity_id, {})
        self.cache[entity_id] = entity
        return entity

kb = KnowledgeBaseLinker()

# Search for candidates
candidates = kb.search_entity("Python")
for c in candidates:
    print(f"{c['id']}: {c['label']} - {c['description']}")
# Q28865: Python - snake genus
# Q282201: Python - programming language
# Q189786: Python - Monty Python

Evaluation Metrics

MetricDescriptionRange
Entity Detection F1NER performance0-100
Linking AccuracyCorrect entity links0-100
End-to-End F1Full pipeline performance0-100
Micro F1Per-mention evaluation0-100
Macro F1Per-type evaluation0-100

DfEntity Linking Accuracy

Comparison of Approaches

ApproachMethodProsCons
PipelineNER β†’ DisambiguationModularError propagation
JointEnd-to-end modelBetter accuracyComplex
Mention-basedIndependent linkingParallelizableNo context sharing
Document-levelDocument contextBetter disambiguationSlower

Entity Disambiguation Example

Entity linking performance is typically measured on benchmark datasets like AIDA-CoNLL, MSNBC, and AquaQuad. State-of-the-art systems achieve >90% accuracy on standard benchmarks.

⭐

Premium Content

Named Entity Linking

Unlock this lesson and 900+ advanced tutorials with a Premium plan.

🎯End-to-end Projects
πŸ’ΌInterview Prep
πŸ“œCertificates
🀝Community Access

Already a member? Log in

Need Expert NLP Help?

Get personalized tutoring, project support, or professional consulting.

Advertisement