Memory Management & Garbage Collection in Python
Difficulty: Hard | Companies: Google, Meta, Amazon, Netflix, Stripe
Reference Counting
import sys
import ctypes
from typing import Any
class ReferenceCountDemo:
"""Demonstrate reference counting in Python."""
def __init__(self, value: Any):
self.value = value
print(f"Created object with value: {value}")
def __del__(self):
"""Destructor called when reference count reaches zero."""
print(f"Deleting object with value: {self.value}")
def reference_counting_basics():
"""Basic reference counting demonstration."""
# Create object - ref count = 1
obj = ReferenceCountDemo(42)
print(f"Reference count: {sys.getrefcount(obj) - 1}") # -1 for getrefcount argument
# Add reference - ref count = 2
alias = obj
print(f"Reference count after alias: {sys.getrefcount(obj) - 1}")
# Store in container - ref count = 3
container = [obj]
print(f"Reference count in container: {sys.getrefcount(obj) - 1}")
# Remove references
del alias
print(f"Reference count after del alias: {sys.getrefcount(obj) - 1}")
container.pop()
print(f"Reference count after remove: {sys.getrefcount(obj) - 1}")
# Object deleted when ref count = 0
del obj
# Circular reference detection
class Node:
"""Node class that can create circular references."""
def __init__(self, value: Any):
self.value = value
self.ref = None
def __del__(self):
print(f"Deleting Node with value: {self.value}")
def create_circular_reference():
"""Create circular reference that GC must handle."""
node1 = Node(1)
node2 = Node(2)
# Create circular reference
node1.ref = node2
node2.ref = node1
# Clear local references
del node1
del node2
# Objects not deleted yet due to circular reference
# GC will collect them later
βΉοΈ
Python uses reference counting as primary memory management, with generational garbage collection to handle circular references.
Garbage Collector Internals
import gc
import weakref
from typing import Any, Dict
class GCStats:
"""Helper to monitor garbage collection."""
@staticmethod
def get_stats() -> Dict:
"""Get current GC statistics."""
return {
'counts': gc.get_count(),
'threshold': gc.get_threshold(),
'garbage': len(gc.garbage),
'stats': gc.get_stats()
}
@staticmethod
def print_stats():
"""Print formatted GC statistics."""
stats = GCStats.get_stats()
print("=== GC Statistics ===")
print(f"Counts (gen0, gen1, gen2): {stats['counts']}")
print(f"Thresholds: {stats['threshold']}")
print(f"Garbage objects: {stats['garbage']}")
for i, gen_stat in enumerate(stats['stats']):
print(f"\nGeneration {i}:")
print(f" Collections: {gen_stat['collections']}")
print(f" Collected: {gen_stat['collected']}")
print(f" Uncollectable: {gen_stat['uncollectable']}")
class CircularRef:
"""Class demonstrating circular reference handling."""
def __init__(self, name: str):
self.name = name
self.partner = None
print(f"Created CircularRef: {name}")
def __repr__(self):
return f"CircularRef({self.name})"
def __del__(self):
print(f"Deleting CircularRef: {self.name}")
def demonstrate_gc():
"""Demonstrate garbage collection behavior."""
# Disable GC for manual control
gc.disable()
try:
# Create objects
a = CircularRef("A")
b = CircularRef("B")
# Create circular reference
a.partner = b
b.partner = a
# Remove local references
del a
del b
# Objects still exist due to circular reference
print(f"Garbage count: {len(gc.garbage)}")
# Manually run GC
collected = gc.collect()
print(f"Objects collected: {collected}")
finally:
gc.enable()
# Weak references
class WeakRefDemo:
"""Demonstrate weak references."""
def __init__(self, value: Any):
self.value = value
def __repr__(self):
return f"WeakRefDemo({self.value})"
def weak_reference_demo():
"""Demonstrate weak reference usage."""
obj = WeakRefDemo(42)
# Create weak reference
weak_ref = weakref.ref(obj)
print(f"Weak ref: {weak_ref()}") # Returns object
# Delete original object
del obj
# Weak reference now returns None
print(f"Weak ref after deletion: {weak_ref()}") # None
# WeakValueDictionary
def weak_value_dict_demo():
"""Demonstrate WeakValueDictionary."""
cache = weakref.WeakValueDictionary()
def get_or_create(key: str) -> WeakRefDemo:
if key not in cache:
cache[key] = WeakRefDemo(key)
return cache[key]
# Create and cache objects
obj1 = get_or_create("item1")
obj2 = get_or_create("item2")
print(f"Cache size: {len(cache)}") # 2
del obj1
import gc; gc.collect()
print(f"Cache size after del: {len(cache)}") # 1
β οΈ
Circular references prevent immediate deallocation. Use weak references for caches and observers to avoid memory leaks.
Memory Optimization Techniques
import sys
from typing import List, Any
import array
class MemoryOptimizationDemo:
"""Demonstrate memory optimization techniques."""
@staticmethod
def compare_memory_footprint():
"""Compare memory usage of different data structures."""
n = 1000000
# List vs array vs generator
list_data = [i for i in range(n)]
array_data = array.array('i', range(n))
generator_data = (i for i in range(n))
print(f"List: {sys.getsizeof(list_data)} bytes")
print(f"Array: {sys.getsizeof(array_data)} bytes")
print(f"Generator: {sys.getsizeof(generator_data)} bytes")
# Dictionary vs __slots__
class RegularClass:
def __init__(self, x, y, z):
self.x = x
self.y = y
self.z = z
class SlotClass:
__slots__ = ['x', 'y', 'z']
def __init__(self, x, y, z):
self.x = x
self.y = y
self.z = z
regular = RegularClass(1, 2, 3)
slotted = SlotClass(1, 2, 3)
print(f"Regular class: {sys.getsizeof(regular) + sys.getsizeof(regular.__dict__)} bytes")
print(f"Slotted class: {sys.getsizeof(slotted)} bytes")
class OptimizedDataStructure:
"""Memory-optimized data structure using __slots__."""
__slots__ = ['_data', '_index']
def __init__(self, data: List[Any]):
self._data = data
self._index = {}
self._build_index()
def _build_index(self):
"""Build index for fast lookups."""
for i, item in enumerate(self._data):
if item not in self._index:
self._index[item] = []
self._index[item].append(i)
def search(self, item: Any) -> List[int]:
"""Search using index - O(1) lookup."""
return self._index.get(item, [])
def __len__(self):
return len(self._data)
# Memory pool pattern
class MemoryPool:
"""Simple memory pool for object reuse."""
def __init__(self, factory, max_size: int = 100):
self.factory = factory
self.max_size = max_size
self.pool = []
def acquire(self):
"""Get object from pool or create new."""
if self.pool:
return self.pool.pop()
return self.factory()
def release(self, obj):
"""Return object to pool."""
if len(self.pool) < self.max_size:
self.pool.append(obj)
def __enter__(self):
return self.acquire()
def __exit__(self, exc_type, exc_val, exc_tb):
self.release(self)
return False
Memory Profiling
import tracemalloc
import linecache
from typing import Callable, Any
from functools import wraps
import time
class MemoryProfiler:
"""Memory profiling utilities."""
@staticmethod
def start():
"""Start memory tracing."""
tracemalloc.start()
@staticmethod
def get_stats():
"""Get current memory statistics."""
return tracemalloc.get_traced_memory()
@staticmethod
def print_stats():
"""Print formatted memory statistics."""
current, peak = tracemalloc.get_traced_memory()
print(f"Current memory: {current / 1024 / 1024:.2f} MB")
print(f"Peak memory: {peak / 1024 / 1024:.2f} MB")
@staticmethod
def take_snapshot():
"""Take memory snapshot."""
return tracemalloc.take_snapshot()
@staticmethod
def print_top_allocations(snapshot, top_n: int = 10):
"""Print top memory allocations."""
stats = snapshot.statistics('lineno')
print(f"\nTop {top_n} memory allocations:")
for i, stat in enumerate(stats[:top_n], 1):
print(f"{i}. {stat}")
def memory_profile(func: Callable) -> Callable:
"""Decorator to profile memory usage."""
@wraps(func)
def wrapper(*args, **kwargs):
tracemalloc.start()
start_time = time.perf_counter()
result = func(*args, **kwargs)
end_time = time.perf_counter()
current, peak = tracemalloc.get_traced_memory()
tracemalloc.stop()
print(f"\n=== Memory Profile: {func.__name__} ===")
print(f"Execution time: {end_time - start_time:.4f}s")
print(f"Current memory: {current / 1024 / 1024:.2f} MB")
print(f"Peak memory: {peak / 1024 / 1024:.2f} MB")
return result
return wrapper
@memory_profile
def memory_intensive_operation():
"""Example memory-intensive operation."""
# Create large list
large_list = [i ** 2 for i in range(1000000)]
# Process data
result = sum(large_list)
return result
# Usage
tracemalloc.start()
snapshot1 = tracemalloc.take_snapshot()
# Perform operation
result = memory_intensive_operation()
snapshot2 = tracemalloc.take_snapshot()
stats = snapshot2.compare_to(snapshot1, 'lineno')
print("\nTop memory increases:")
for stat in stats[:5]:
print(stat)
Weak References and Callbacks
import weakref
from typing import Any, Callable, Dict, Optional
class Observable:
"""Observable pattern using weak references."""
def __init__(self):
self._observers: Dict[str, list] = {}
def add_observer(self, event: str, callback: Callable):
"""Add observer with weak reference."""
if event not in self._observers:
self._observers[event] = []
# Use weak reference to prevent memory leaks
ref = weakref.WeakMethod(callback, self._make_ref_callback(event))
self._observers[event].append(ref)
def _make_ref_callback(self, event: str) -> Callable:
"""Create callback for dead references."""
def callback(ref):
print(f"Observer for {event} was garbage collected")
return callback
def notify(self, event: str, *args, **kwargs):
"""Notify all observers of event."""
if event not in self._observers:
return
dead_refs = []
for ref in self._observers[event]:
callback = ref()
if callback is not None:
callback(*args, **kwargs)
else:
dead_refs.append(ref)
# Clean up dead references
for ref in dead_refs:
self._observers[event].remove(ref)
class CacheEntry:
"""Cache entry with expiration."""
def __init__(self, value: Any, ttl: float = 60.0):
import time
self.value = value
self.timestamp = time.time()
self.ttl = ttl
def is_expired(self) -> bool:
import time
return time.time() - self.timestamp > self.ttl
class TTLCache:
"""Cache with time-to-live using weak references."""
def __init__(self, maxsize: int = 128):
self.maxsize = maxsize
self._cache: Dict[str, CacheEntry] = {}
def get(self, key: str) -> Optional[Any]:
"""Get value from cache."""
if key in self._cache:
entry = self._cache[key]
if not entry.is_expired():
return entry.value
else:
del self._cache[key]
return None
def set(self, key: str, value: Any, ttl: float = 60.0):
"""Set value in cache."""
if len(self._cache) >= self.maxsize:
self._evict_expired()
if len(self._cache) >= self.maxsize:
self._evict_oldest()
self._cache[key] = CacheEntry(value, ttl)
def _evict_expired(self):
"""Remove expired entries."""
expired = [k for k, v in self._cache.items() if v.is_expired()]
for k in expired:
del self._cache[k]
def _evict_oldest(self):
"""Remove oldest entry."""
if self._cache:
oldest_key = min(self._cache.keys(), key=lambda k: self._cache[k].timestamp)
del self._cache[oldest_key]
# Usage
cache = TTLCache(maxsize=100)
cache.set("user:1", {"name": "John"}, ttl=30)
user = cache.get("user:1")
print(user) # {'name': 'John'}
βΉοΈ
Use
weakref for caches, observers, and any pattern where you want to avoid preventing garbage collection of referenced objects.Follow-Up Questions
-
Explain the difference between reference counting and garbage collection.
-
How does Python handle circular references?
-
What are the benefits of using
__slots__? -
How do you detect and fix memory leaks in Python?
-
Explain the generational garbage collection algorithm.