Descriptor Protocol: get, set, delete, Properties
Understanding Python's descriptor protocol and property system
Interview Question
"Explain Python's descriptor protocol. How do properties work internally? Implement custom descriptors for validation, caching, and computed attributes. What's the difference between data and non-data descriptors?"
Difficulty: Hard | Frequently asked at Google, Meta, Amazon
Theoretical Foundation
What is a Descriptor?
A descriptor is an object that defines at least one of __get__, __set__, or __delete__. Descriptors allow you to customize attribute access.
# Descriptor protocol methods
class Descriptor:
def __get__(self, obj, objtype=None):
"""Called when attribute is accessed."""
pass
def __set__(self, obj, value):
"""Called when attribute is set."""
pass
def __delete__(self, obj):
"""Called when attribute is deleted."""
pass
def __set_name__(self, owner, name):
"""Called when descriptor is assigned to a class attribute."""
pass
ℹ️
Key Concept: Descriptors are the mechanism behind properties, class methods, static methods, and more.
Data vs Non-Data Descriptors
# Data descriptor: Defines __set__ or __delete__
class DataDescriptor:
def __get__(self, obj, objtype=None):
print("DataDescriptor.__get__")
return 42
def __set__(self, obj, value):
print(f"DataDescriptor.__set__({value})")
def __delete__(self, obj):
print("DataDescriptor.__delete__")
# Non-data descriptor: Only defines __get__
class NonDataDescriptor:
def __get__(self, obj, objtype=None):
print("NonDataDescriptor.__get__")
return 42
class MyClass:
data_desc = DataDescriptor()
non_data_desc = NonDataDescriptor()
obj = MyClass()
# Data descriptor takes precedence over instance attributes
obj.data_desc = 100 # Calls __set__
print(f"data_desc: {obj.data_desc}") # Calls __get__, not instance attr
# Non-data descriptor can be overridden by instance attributes
obj.non_data_desc = 100 # Sets instance attribute
print(f"non_data_desc: {obj.non_data_desc}") # Returns 100, not descriptor
Output:
DataDescriptor.__set__(100)
DataDescriptor.__get__
data_desc: 42
non_data_desc: 100
Properties
How Properties Work
# Property is implemented using descriptors
class Property:
"""Simplified property implementation."""
def __init__(self, fget=None, fset=None, fdel=None, doc=None):
self.fget = fget
self.fset = fset
self.fdel = fdel
self.__doc__ = doc or (fget.__doc__ if fget else None)
def __get__(self, obj, objtype=None):
if obj is None:
return self
if self.fget is None:
raise AttributeError("unreadable attribute")
return self.fget(obj)
def __set__(self, obj, value):
if self.fset is None:
raise AttributeError("can't set attribute")
self.fset(obj, value)
def __delete__(self, obj):
if self.fdel is None:
raise AttributeError("can't delete attribute")
self.fdel(obj)
def getter(self, fget):
return type(self)(fget, self.fset, self.fdel, self.__doc__)
def setter(self, fset):
return type(self)(self.fget, fset, self.fdel, self.__doc__)
def deleter(self, fdel):
return type(self)(self.fget, self.fset, fdel, self.__doc__)
# Usage
class Circle:
def __init__(self, radius):
self._radius = radius
@Property
def radius(self):
"""The radius of the circle."""
return self._radius
@radius.setter
def radius(self, value):
if value < 0:
raise ValueError("Radius cannot be negative")
self._radius = value
@Property
def area(self):
"""Computed area property."""
import math
return math.pi * self._radius ** 2
# Usage
circle = Circle(5)
print(f"Radius: {circle.radius}")
print(f"Area: {circle.area:.2f}")
circle.radius = 10
print(f"New area: {circle.area:.2f}")
Output:
Radius: 5
Area: 78.54
New area: 314.16
💡
Interview Tip: Properties are data descriptors, which is why they take precedence over instance attributes.
Custom Descriptors
Validation Descriptor
class Validated:
"""Base validated descriptor."""
def __set_name__(self, owner, name):
self.name = name
self.private_name = f'_{name}'
def __get__(self, obj, objtype=None):
if obj is None:
return self
return getattr(obj, self.private_name, None)
def __set__(self, obj, value):
value = self.validate(value)
setattr(obj, self.private_name, value)
def validate(self, value):
raise NotImplementedError
class PositiveNumber(Validated):
def validate(self, value):
if not isinstance(value, (int, float)):
raise TypeError(f"{self.name} must be a number")
if value <= 0:
raise ValueError(f"{self.name} must be positive")
return value
class NonEmptyString(Validated):
def validate(self, value):
if not isinstance(value, str):
raise TypeError(f"{self.name} must be a string")
if not value.strip():
raise ValueError(f"{self.name} cannot be empty")
return value.strip()
class Email(Validated):
def validate(self, value):
if not isinstance(value, str):
raise TypeError(f"{self.name} must be a string")
if '@' not in value:
raise ValueError(f"{self.name} must be a valid email")
return value.lower()
class User:
name = NonEmptyString()
email = Email()
age = PositiveNumber()
def __init__(self, name, email, age):
self.name = name
self.email = email
self.age = age
# Usage
user = User("Alice", "alice@EXAMPLE.com", 30)
print(f"Name: {user.name}")
print(f"Email: {user.email}")
print(f"Age: {user.age}")
# Validation works
try:
bad_user = User("", "invalid", -5)
except (ValueError, TypeError) as e:
print(f"Error: {e}")
Output:
Name: Alice
Email: alice@example.com
Age: 30
Error: name cannot be empty
Caching Descriptor
import time
from functools import wraps
class CachedProperty:
"""Descriptor that caches computed property."""
def __init__(self, func):
self.func = func
self.attrname = None
def __set_name__(self, owner, name):
self.attrname = name
def __get__(self, obj, objtype=None):
if obj is None:
return self
# Check if value is cached
try:
value = obj.__dict__[self.attrname]
except KeyError:
# Compute and cache
value = self.func(obj)
obj.__dict__[self.attrname] = value
return value
class TTLCache:
"""Time-to-live caching descriptor."""
def __init__(self, func, ttl=60):
self.func = func
self.ttl = ttl
self.attrname = None
def __set_name__(self, owner, name):
self.attrname = name
def __get__(self, obj, objtype=None):
if obj is None:
return self
# Check cache
cache_key = f'_cache_{self.attrname}'
cache_time_key = f'_cache_time_{self.attrname}'
if hasattr(obj, cache_key):
cache_time = getattr(obj, cache_time_key)
if time.time() - cache_time < self.ttl:
return getattr(obj, cache_key)
# Compute and cache
value = self.func(obj)
setattr(obj, cache_key, value)
setattr(obj, cache_time_key, time.time())
return value
class DataFetcher:
def __init__(self, data):
self.data = data
@CachedProperty
def expensive_computation(self):
"""Expensive computation that should be cached."""
print("Computing...")
time.sleep(0.1) # Simulate work
return sum(self.data) / len(self.data)
@TTLCache(ttl=5)
def time_sensitive_data(self):
"""Data that expires after TTL."""
print("Fetching time-sensitive data...")
return {"timestamp": time.time()}
# Usage
fetcher = DataFetcher([1, 2, 3, 4, 5])
# First access computes
print(f"Average: {fetcher.expensive_computation}")
# Second access uses cache
print(f"Average (cached): {fetcher.expensive_computation}")
# TTL cache
print(f"Data 1: {fetcher.time_sensitive_data}")
time.sleep(1)
print(f"Data 2: {fetcher.time_sensitive_data}") # Still cached
Output:
Computing...
Average: 3.0
Average (cached): 3.0
Fetching time-sensitive data...
Data 1: {'timestamp': 1234567890.123}
Data 2: {'timestamp': 1234567890.123}
ℹ️
Performance Tip: Caching descriptors can dramatically improve performance by avoiding redundant computations.
Advanced Patterns
Computed Attributes
class ComputedField:
"""Descriptor for computed attributes with dependencies."""
def __init__(self, func, dependencies=None):
self.func = func
self.dependencies = dependencies or []
self.attrname = None
def __set_name__(self, owner, name):
self.attrname = name
def __get__(self, obj, objtype=None):
if obj is None:
return self
# Check if dependencies changed
cache_key = f'_computed_{self.attrname}'
deps_key = f'_deps_{self.attrname}'
if hasattr(obj, cache_key):
cached_deps = getattr(obj, deps_key, [])
current_deps = [getattr(obj, dep) for dep in self.dependencies]
if cached_deps == current_deps:
return getattr(obj, cache_key)
# Compute and cache
value = self.func(obj)
setattr(obj, cache_key, value)
setattr(obj, deps_key, [getattr(obj, dep) for dep in self.dependencies])
return value
class Rectangle:
def __init__(self, width, height):
self.width = width
self.height = height
@ComputedField(dependencies=['width', 'height'])
def area(self):
"""Computed area with dependency tracking."""
print("Computing area...")
return self.width * self.height
@ComputedField(dependencies=['width', 'height'])
def perimeter(self):
"""Computed perimeter with dependency tracking."""
print("Computing perimeter...")
return 2 * (self.width + self.height)
# Usage
rect = Rectangle(5, 10)
# First access computes
print(f"Area: {rect.area}")
print(f"Perimeter: {rect.perimeter}")
# Second access uses cache
print(f"Area (cached): {rect.area}")
# Changing dependency invalidates cache
rect.width = 7
print(f"Area (recomputed): {rect.area}")
Output:
Computing area...
Area: 50
Computing perimeter...
Perimeter: 30
Area (cached): 50
Computing area...
Area (recomputed): 70
Type Checking Descriptor
class TypeChecked:
"""Descriptor that enforces type checking."""
def __init__(self, expected_type):
self.expected_type = expected_type
self.attrname = None
def __set_name__(self, owner, name):
self.attrname = name
self.private_name = f'_{name}'
def __get__(self, obj, objtype=None):
if obj is None:
return self
return getattr(obj, self.private_name, None)
def __set__(self, obj, value):
if not isinstance(value, self.expected_type):
raise TypeError(
f"{self.attrname} must be {self.expected_type.__name__}, "
f"got {type(value).__name__}"
)
setattr(obj, self.private_name, value)
class Person:
name = TypeChecked(str)
age = TypeChecked(int)
email = TypeChecked(str)
def __init__(self, name, age, email):
self.name = name
self.age = age
self.email = email
# Usage
person = Person("Alice", 30, "alice@example.com")
print(f"Name: {person.name}")
# Type checking works
try:
person.age = "thirty" # Wrong type
except TypeError as e:
print(f"Error: {e}")
Output:
Name: Alice
Error: age must be int, got str
Observable Descriptor
class Observable:
"""Descriptor that notifies on changes."""
def __init__(self, default=None):
self.default = default
self.attrname = None
self.observers = []
def __set_name__(self, owner, name):
self.attrname = name
self.private_name = f'_{name}'
def __get__(self, obj, objtype=None):
if obj is None:
return self
return getattr(obj, self.private_name, self.default)
def __set__(self, obj, value):
old_value = getattr(obj, self.private_name, self.default)
setattr(obj, self.private_name, value)
# Notify observers
if old_value != value:
for observer in self.observers:
observer(obj, self.attrname, old_value, value)
def add_observer(self, observer):
self.observers.append(observer)
def remove_observer(self, observer):
self.observers.remove(observer)
# Usage
class ObservablePerson:
name = Observable(default="Unknown")
age = Observable(default=0)
def __init__(self, name, age):
self.name = name
self.age = age
# Observer function
def on_change(obj, attr_name, old_value, new_value):
print(f"{attr_name} changed from {old_value} to {new_value}")
# Create person and add observer
person = ObservablePerson("Alice", 30)
ObservablePerson.name.add_observer(on_change)
ObservablePerson.age.add_observer(on_change)
# Changes trigger notifications
person.name = "Bob"
person.age = 31
Output:
name changed from Alice to Bob
age changed from 30 to 31
💡
Design Pattern: Observable descriptors enable reactive programming patterns in Python.
Real-World Applications
SQLAlchemy-Style Fields
class Field:
"""Database field descriptor."""
def __init__(self, column_type, primary_key=False, nullable=True):
self.column_type = column_type
self.primary_key = primary_key
self.nullable = nullable
self.name = None
def __set_name__(self, owner, name):
self.name = name
def __get__(self, obj, objtype=None):
if obj is None:
return self
return getattr(obj, f'_db_{self.name}', None)
def __set__(self, obj, value):
if value is None and not self.nullable:
raise ValueError(f"{self.name} cannot be null")
setattr(obj, f'_db_{self.name}', value)
class IntegerField(Field):
def __init__(self, **kwargs):
super().__init__('INTEGER', **kwargs)
class StringField(Field):
def __init__(self, max_length=255, **kwargs):
super().__init__(f'VARCHAR({max_length})', **kwargs)
class ModelMeta(type):
"""Metaclass for database models."""
def __new__(cls, name, bases, dict):
fields = {}
for key, value in dict.items():
if isinstance(value, Field):
fields[key] = value
dict['_fields'] = fields
return super().__new__(cls, name, bases, dict)
class Model(metaclass=ModelMeta):
"""Base database model."""
def __init__(self, **kwargs):
for field_name, field in self._fields.items():
value = kwargs.get(field_name)
setattr(self, field_name, value)
def save(self):
"""Simulate database save."""
fields = self._fields
values = [getattr(self, name) for name in fields]
print(f"Saving {type(self).__name__}: {dict(zip(fields.keys(), values))}")
class User(Model):
id = IntegerField(primary_key=True)
name = StringField(max_length=100)
email = StringField(max_length=255, nullable=False)
# Usage
user = User(id=1, name="Alice", email="alice@example.com")
user.save()
Output:
Saving User: {'id': 1, 'name': 'Alice', 'email': 'alice@example.com'}
Configuration System
class ConfigField:
"""Configuration field with default and validation."""
def __init__(self, default=None, validator=None, env_var=None):
self.default = default
self.validator = validator
self.env_var = env_var
self.attrname = None
def __set_name__(self, owner, name):
self.attrname = name
def __get__(self, obj, objtype=None):
if obj is None:
return self
# Check instance cache
cache_key = f'_config_{self.attrname}'
if hasattr(obj, cache_key):
return getattr(obj, cache_key)
# Check environment variable
if self.env_var:
import os
env_value = os.environ.get(self.env_var)
if env_value is not None:
value = self._convert_type(env_value)
setattr(obj, cache_key, value)
return value
# Use default
return self.default
def __set__(self, obj, value):
if self.validator and not self.validator(value):
raise ValueError(f"Invalid value for {self.attrname}")
setattr(obj, f'_config_{self.attrname}', value)
def _convert_type(self, value):
"""Convert string to appropriate type."""
if value.lower() in ('true', 'yes', '1'):
return True
elif value.lower() in ('false', 'no', '0'):
return False
try:
return int(value)
except ValueError:
try:
return float(value)
except ValueError:
return value
class AppConfig:
DEBUG = ConfigField(default=False, env_var='APP_DEBUG')
HOST = ConfigField(default='localhost', env_var='APP_HOST')
PORT = ConfigField(default=8000, env_var='APP_PORT')
SECRET_KEY = ConfigField(default='default-secret', env_var='APP_SECRET')
def __init__(self):
pass
# Usage
config = AppConfig()
print(f"DEBUG: {config.DEBUG}")
print(f"HOST: {config.HOST}")
print(f"PORT: {config.PORT}")
# Override with environment variable
import os
os.environ['APP_DEBUG'] = 'true'
config2 = AppConfig()
print(f"DEBUG (from env): {config2.DEBUG}")
Output:
DEBUG: False
HOST: localhost
PORT: 8000
DEBUG (from env): True
ℹ️
Framework Pattern: This pattern is used in Django settings, Flask configuration, and other frameworks.
Complexity Analysis
Performance Comparison
| Operation | Direct Attribute | Property | Custom Descriptor |
|---|---|---|---|
| Get | O(1) | O(1) | O(1) |
| Set | O(1) | O(1) | O(1) |
| Delete | O(1) | O(1) | O(1) |
| Memory | Minimal | Minimal | Minimal |
Memory Usage
import sys
class RegularClass:
def __init__(self):
self.value = 42
class PropertyClass:
def __init__(self):
self._value = 42
@property
def value(self):
return self._value
class DescriptorClass:
def __init__(self):
self._value = 42
class CustomDescriptor:
def __get__(self, obj, objtype=None):
if obj is None:
return self
return obj._value
class WithDescriptor:
value = CustomDescriptor()
def __init__(self):
self._value = 42
# Compare memory
regular = RegularClass()
prop = PropertyClass()
desc = WithDescriptor()
print(f"RegularClass: {sys.getsizeof(regular)} bytes")
print(f"PropertyClass: {sys.getsizeof(prop)} bytes")
print(f"WithDescriptor: {sys.getsizeof(desc)} bytes")
Interview Tips
Common Follow-up Questions
-
"Why do properties take precedence over instance attributes?"
- Properties are data descriptors
- Data descriptors take precedence in attribute lookup
- This is why
obj.prop = valuecalls the setter
-
"How do class methods and static methods work internally?"
- They're implemented as descriptors
classmethoddescriptor returns bound method to classstaticmethoddescriptor returns the raw function
-
"What's
__set_name__used for?"- Called when descriptor is assigned to a class attribute
- Allows descriptor to know its attribute name
- Essential for creating multiple descriptor instances
Code Review Tips
# BAD: Not using __set_name__
class BadDescriptor:
def __init__(self, name):
self.name = name
def __get__(self, obj, objtype=None):
return getattr(obj, f'_{self.name}')
# GOOD: Using __set_name__
class GoodDescriptor:
def __set_name__(self, owner, name):
self.name = name
def __get__(self, obj, objtype=None):
return getattr(obj, f'_{self.name}')
# BAD: Not handling None obj
class BadDescriptor:
def __get__(self, obj, objtype=None):
return obj._value # Fails when accessed from class
# GOOD: Handling None obj
class GoodDescriptor:
def __get__(self, obj, objtype=None):
if obj is None:
return self
return obj._value
⚠️
Common Mistake: Forgetting to handle obj is None in __get__ causes errors when accessing descriptor from class.
Summary
| Descriptor Type | Methods | Use Case |
|---|---|---|
| Data | __get__, __set__ | Properties, validation |
| Non-data | __get__ only | Methods, class methods |
| Custom | All methods | Advanced patterns |
Key Concepts
- Data descriptors take precedence over instance attributes
- Non-data descriptors can be overridden by instance attributes
- Properties are data descriptors
__set_name__is called when descriptor is assigned to class- Descriptors are the foundation of Python's attribute access
ℹ️
Key Takeaway: Descriptors are powerful tools for customizing attribute access. Understanding them unlocks advanced Python patterns.
Practice Problems
- Validated Fields: Create a descriptor system for form validation
- Lazy Properties: Implement properties that compute only when accessed
- Change Observer: Build a descriptor that notifies on attribute changes
- Cached Methods: Create a method decorator that caches results
- Type Enforcer: Implement a descriptor that enforces type hints
Further Reading
- PEP 252: Making Types Look More Like Classes
- Python Docs: Descriptor HowTo Guide
- Books: "Python in a Nutshell" by Alex Martelli
- Advanced:
__slots__and descriptors
Remember: Descriptors are the mechanism behind properties, methods, and more. Master them to understand Python's object model.