Machine Translation
Machine translation (MT) automatically translates text from one language to another. Modern NMT uses encoder-decoder transformers with attention.
Seq2Seq Architecture
DfSeq2Seq Model
The encoder processes the source sentence, and the decoder generates the target sentence token by token.
from transformers import (
AutoModelForSeq2SeqLM,
AutoTokenizer,
Seq2SeqTrainingArguments,
Seq2SeqTrainer,
)
# MarianMT for translation
model_name = "Helsinki-NLP/opus-mt-en-fr"
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForSeq2SeqLM.from_pretrained(model_name)
def translate(text):
inputs = tokenizer(text, return_tensors="pt", padding=True)
translated = model.generate(**inputs)
return tokenizer.decode(translated[0], skip_special_tokens=True)
print(translate("Hello, how are you?"))
# "Bonjour, comment allez-vous?"
BLEU Score
BLEU (Bilingual Evaluation Understudy) measures translation quality by comparing n-gram overlap with reference translations.
DfBLEU Score
DfModified N-gram Precision
DfBrevity Penalty
Where:
p_n: Modified n-gram precisionw_n: Weight for n-gram (typically 1/N)c: Candidate translation lengthr: Reference translation length
from nltk.translate.bleu_score import (
corpus_bleu,
SmoothingFunction,
sentence_bleu
)
# Single sentence BLEU
reference = [['the', 'cat', 'is', 'on', 'the', 'mat']]
candidate = ['the', 'cat', 'sat', 'on', 'the', 'mat']
# Smoothed BLEU (handles zero counts)
smooth = SmoothingFunction().method1
score = sentence_bleu(reference, candidate, smoothing_function=smooth)
print(f"BLEU: {score:.4f}") # 0.7143
# Corpus BLEU
references = [
[['the', 'cat', 'is', 'on', 'the', 'mat']],
[['there', 'is', 'a', 'cat', 'on', 'the', 'mat']]
]
candidates = [
['the', 'cat', 'is', 'on', 'the', 'mat'],
['there', 'is', 'a', 'cat', 'on', 'the', 'mat']
]
corpus_score = corpus_bleu(references, candidates)
print(f"Corpus BLEU: {corpus_score:.4f}")
Translation Evaluation Metrics
| Metric | Type | Measures | Range |
|---|---|---|---|
| BLEU | Precision | N-gram overlap | 0-100 |
| METEOR | Balance | Precision + recall + alignment | 0-1 |
| TER | Error | Edit distance / reference length | 0-β |
| CHRF | Character | Character n-gram F1 | 0-100 |
| COMET | Neural | Learned metric | -1 to 1 |
| BERTScore | Semantic | Embedding similarity | 0-1 |
import sacrebleu
# BLEU with sacrebleu
references = ["Le chat est sur le tapis."]
candidate = "Le chat est sur le tapis."
bleu = sacrebleu.corpus_bleu([candidate], [references])
print(f"BLEU: {bleu.score:.2f}")
print(f"Signature: {bleu}")
# chrF (character n-gram F-score)
chrf = sacrebleu.corpus_chrf([candidate], [references])
print(f"chrF: {chrf.score:.2f}")
# TER (Translation Edit Rate)
ter = sacrebleu.corpus_ter([candidate], [references])
print(f"TER: {ter.score:.2f}")
Beam Search for Translation
def translate_with_beam(source_text, model, tokenizer, num_beams=5):
inputs = tokenizer(source_text, return_tensors="pt", padding=True)
outputs = model.generate(
**inputs,
num_beams=num_beams,
max_length=128,
length_penalty=0.6,
early_stopping=True,
no_repeat_ngram_size=3
)
return tokenizer.decode(outputs[0], skip_special_tokens=True)
# Compare beam sizes
for beams in [1, 3, 5, 10]:
result = translate_with_beam("Hello world!", model, tokenizer, num_beams=beams)
print(f"Beams={beams}: {result}")
Quality Estimation
Quality estimation (QE) predicts translation quality without references.
from transformers import AutoModelForSequenceClassification
class TranslationQE:
def __init__(self, model_name="Unbabel/wmt22-comet-da"):
self.model = AutoModelForSequenceClassification.from_pretrained(model_name)
self.tokenizer = AutoTokenizer.from_pretrained(model_name)
def score(self, source, translation, reference=None):
inputs = self.tokenizer(
source,
translation,
return_tensors="pt",
truncation=True
)
with torch.no_grad():
outputs = self.model(**inputs)
score = torch.sigmoid(outputs.logits).item()
return score
qe = TranslationQE()
score = qe.score(
"The weather is nice today.",
"Le temps est beau aujourd'hui."
)
print(f"Quality Score: {score:.4f}")
BLEU Score Calculation
BLEU was originally designed for machine translation but has been adapted for text summarization, image captioning, and other generation tasks. It correlates well with human judgment at the corpus level but not for individual sentences.
Back-Translation
Back-translation generates synthetic parallel data from monolingual corpora.
def back_translation(monolingual_data, en_to_fr_model, fr_to_en_model):
synthetic_pairs = []
for text in monolingual_data:
# Translate to target language
fr_text = translate(text, en_to_fr_model)
# Translate back to source
en_text = translate(fr_text, fr_to_en_model)
# Use original as source, back-translated as target
synthetic_pairs.append((text, en_text))
return synthetic_pairs