Recommender Systems with Deep Learning
Recommendation systems drive engagement across platforms. Move beyond matrix factorization to deep learning approaches that capture complex patterns.
Modern Recommendation Architecture
Neural Collaborative Filtering
import torch
import torch.nn as nn
import torch.nn.functional as F
class NeuralCF(nn.Module):
def __init__(self, num_users, num_items, embed_dim=64, hidden_dims=[128, 64, 32]):
super().__init__()
# Embedding layers
self.user_embed_gmf = nn.Embedding(num_users, embed_dim)
self.item_embed_gmf = nn.Embedding(num_items, embed_dim)
self.user_embed_mlp = nn.Embedding(num_users, embed_dim)
self.item_embed_mlp = nn.Embedding(num_items, embed_dim)
# MLP layers
mlp_layers = []
input_dim = embed_dim * 2
for hidden_dim in hidden_dims:
mlp_layers.extend([
nn.Linear(input_dim, hidden_dim),
nn.ReLU(),
nn.BatchNorm1d(hidden_dim),
nn.Dropout(0.2)
])
input_dim = hidden_dim
self.mlp = nn.Sequential(*mlp_layers)
# Final prediction layer
self.output = nn.Linear(hidden_dims[-1] + embed_dim, 1)
def forward(self, user_ids, item_ids):
# GMF path
user_gmf = self.user_embed_gmf(user_ids)
item_gmf = self.item_embed_gmf(item_ids)
gmf_out = user_gmf * item_gmf # Element-wise product
# MLP path
user_mlp = self.user_embed_mlp(user_ids)
item_mlp = self.item_embed_mlp(item_ids)
mlp_input = torch.cat([user_mlp, item_mlp], dim=1)
mlp_out = self.mlp(mlp_input)
# Combine GMF and MLP
concat = torch.cat([gmf_out, mlp_out], dim=1)
output = self.output(concat)
return torch.sigmoid(output).squeeze()
# Training
model = NeuralCF(num_users=10000, num_items=50000)
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)
criterion = nn.BCELoss()
for epoch in range(10):
model.train()
for user_batch, item_batch, label_batch in train_loader:
pred = model(user_batch, item_batch)
loss = criterion(pred, label_batch.float())
optimizer.zero_grad()
loss.backward()
optimizer.step()
Deep Sequence Models
import torch
import torch.nn as nn
class SASRec(nn.Module):
"""Self-Attentive Sequential Recommendation"""
def __init__(self, num_items, embed_dim=64, num_heads=4,
num_layers=2, max_seq_len=50, dropout=0.1):
super().__init__()
self.item_embed = nn.Embedding(num_items + 1, embed_dim, padding_idx=0)
self.position_embed = nn.Embedding(max_seq_len, embed_dim)
encoder_layer = nn.TransformerEncoderLayer(
d_model=embed_dim,
nhead=num_heads,
dim_feedforward=embed_dim * 4,
dropout=dropout,
batch_first=True
)
self.transformer = nn.TransformerEncoder(encoder_layer, num_layers)
self.layer_norm = nn.LayerNorm(embed_dim)
self.dropout = nn.Dropout(dropout)
def forward(self, item_seq, mask=None):
seq_len = item_seq.size(1)
# Embeddings
item_emb = self.item_embed(item_seq)
pos_ids = torch.arange(seq_len, device=item_seq.device).unsqueeze(0)
pos_emb = self.position_embed(pos_ids)
x = self.dropout(item_emb + pos_emb)
# Causal mask for autoregressive prediction
causal_mask = torch.triu(torch.ones(seq_len, seq_len, device=x.device), diagonal=1).bool()
# Transformer encoding
x = self.transformer(x, mask=causal_mask, src_key_padding_mask=mask)
x = self.layer_norm(x)
return x
# Next item prediction
def predict_next_item(model, item_seq, k=10):
model.eval()
with torch.no_grad():
seq_output = model(item_seq.unsqueeze(0))
last_hidden = seq_output[:, -1, :] # Last position
# Score all items
item_scores = torch.matmul(last_hidden, model.item_embed.weight.T)
# Get top-k
_, top_items = torch.topk(item_scores, k)
return top_items.squeeze().tolist()
Two-Tower Model
import torch
import torch.nn as nn
import torch.nn.functional as F
class TwoTowerModel(nn.Module):
"""Two-tower architecture for large-scale retrieval"""
def __init__(self, num_users, num_items, embed_dim=128,
user_features=10, item_features=20):
super().__init__()
# User tower
self.user_embed = nn.Embedding(num_users, 64)
self.user_tower = nn.Sequential(
nn.Linear(64 + user_features, 256),
nn.ReLU(),
nn.BatchNorm1d(256),
nn.Linear(256, 128),
nn.ReLU(),
nn.Linear(128, embed_dim)
)
# Item tower
self.item_embed = nn.Embedding(num_items, 64)
self.item_tower = nn.Sequential(
nn.Linear(64 + item_features, 256),
nn.ReLU(),
nn.BatchNorm1d(256),
nn.Linear(256, 128),
nn.ReLU(),
nn.Linear(128, embed_dim)
)
# Temperature parameter
self.temperature = nn.Parameter(torch.tensor(0.07))
def encode_user(self, user_ids, user_features=None):
user_emb = self.user_embed(user_ids)
if user_features is not None:
user_emb = torch.cat([user_emb, user_features], dim=1)
return F.normalize(self.user_tower(user_emb), dim=1)
def encode_item(self, item_ids, item_features=None):
item_emb = self.item_embed(item_ids)
if item_features is not None:
item_emb = torch.cat([item_emb, item_features], dim=1)
return F.normalize(self.item_tower(item_emb), dim=1)
def forward(self, user_ids, pos_item_ids, neg_item_ids,
user_features=None, pos_features=None, neg_features=None):
user_emb = self.encode_user(user_ids, user_features)
pos_emb = self.encode_item(pos_item_ids, pos_features)
neg_emb = self.encode_item(neg_item_ids, neg_features)
# InfoNCE loss
pos_scores = (user_emb * pos_emb).sum(dim=1) / self.temperature
neg_scores = torch.matmul(user_emb, neg_emb.T) / self.temperature
labels = torch.arange(len(user_ids), device=user_ids.device)
loss = F.cross_entropy(torch.cat([pos_scores.unsqueeze(1), neg_scores], dim=1), labels)
return loss
# FAISS indexing for efficient retrieval
import faiss
import numpy as np
def build_faiss_index(item_embeddings, nprobe=10):
"""Build FAISS index for fast approximate nearest neighbor search"""
dimension = item_embeddings.shape[1]
# Use IVF for large datasets
nlist = int(np.sqrt(len(item_embeddings)))
quantizer = faiss.IndexFlatIP(dimension)
index = faiss.IndexIVFFlat(quantizer, dimension, nlist, faiss.METRIC_INNER_PRODUCT)
# Normalize for cosine similarity
faiss.normalize_L2(item_embeddings)
index.train(item_embeddings)
index.add(item_embeddings)
index.nprobe = nprobe
return index
def retrieve_candidates(model, user_id, item_index, k=100):
"""Retrieve top-k candidate items for a user"""
user_emb = model.encode_user(torch.tensor([user_id])).numpy()
faiss.normalize_L2(user_emb)
distances, indices = item_index.search(user_emb, k)
return indices[0].tolist(), distances[0].tolist()
Feature Engineering for Recommendations
import pandas as pd
import numpy as np
class FeatureEngine:
def __init__(self):
self.user_stats = {}
self.item_stats = {}
def compute_features(self, interactions_df):
# User features
user_features = interactions_df.groupby('user_id').agg({
'item_id': 'count',
'rating': ['mean', 'std'],
'timestamp': ['min', 'max']
}).reset_index()
user_features.columns = ['user_id', 'interaction_count',
'avg_rating', 'rating_std',
'first_seen', 'last_seen']
# Item features
item_features = interactions_df.groupby('item_id').agg({
'user_id': 'count',
'rating': ['mean', 'std'],
'timestamp': lambda x: x.max() - x.min()
}).reset_index()
item_features.columns = ['item_id', 'popularity',
'avg_rating', 'rating_std',
'lifetime']
# Temporal features
interactions_df['hour'] = pd.to_datetime(interactions_df['timestamp']).dt.hour
interactions_df['dayofweek'] = pd.to_datetime(interactions_df['timestamp']).dt.dayofweek
# User-item interaction matrix for collaborative features
from scipy.sparse import csr_matrix
user_item_matrix = csr_matrix(
(interactions_df['rating'].values,
(interactions_df['user_id'].values, interactions_df['item_id'].values))
)
return {
'user_features': user_features,
'item_features': item_features,
'interactions': interactions_df,
'user_item_matrix': user_item_matrix
}
Best Practices
- Two-tower models for large-scale candidate generation
- Sequential models for capturing user intent over time
- Hybrid approaches combine collaborative and content-based signals
- Evaluate with ranking metrics (NDCG, MAP), not just accuracy
- Handle cold start with content-based features and popularity fallbacks