πŸŽ‰ 75% of content is free forever β€” Unlock Premium from $10/mo β†’
CW
Search courses…
πŸ’Ό Servicesℹ️ Aboutβœ‰οΈ ContactView Pricing Plansfrom $10

Memory Management & Garbage Collection in Python

Python InterviewPython Internals⭐ Premium

Advertisement

Memory Management & Garbage Collection in Python

Difficulty: Hard | Companies: Google, Meta, Amazon, Netflix, Stripe

Reference Counting

import sys
import ctypes
from typing import Any

class ReferenceCountDemo:
    """Demonstrate reference counting in Python."""
    
    def __init__(self, value: Any):
        self.value = value
        print(f"Created object with value: {value}")
    
    def __del__(self):
        """Destructor called when reference count reaches zero."""
        print(f"Deleting object with value: {self.value}")

def reference_counting_basics():
    """Basic reference counting demonstration."""
    # Create object - ref count = 1
    obj = ReferenceCountDemo(42)
    print(f"Reference count: {sys.getrefcount(obj) - 1}")  # -1 for getrefcount argument
    
    # Add reference - ref count = 2
    alias = obj
    print(f"Reference count after alias: {sys.getrefcount(obj) - 1}")
    
    # Store in container - ref count = 3
    container = [obj]
    print(f"Reference count in container: {sys.getrefcount(obj) - 1}")
    
    # Remove references
    del alias
    print(f"Reference count after del alias: {sys.getrefcount(obj) - 1}")
    
    container.pop()
    print(f"Reference count after remove: {sys.getrefcount(obj) - 1}")
    
    # Object deleted when ref count = 0
    del obj

# Circular reference detection
class Node:
    """Node class that can create circular references."""
    
    def __init__(self, value: Any):
        self.value = value
        self.ref = None
    
    def __del__(self):
        print(f"Deleting Node with value: {self.value}")

def create_circular_reference():
    """Create circular reference that GC must handle."""
    node1 = Node(1)
    node2 = Node(2)
    
    # Create circular reference
    node1.ref = node2
    node2.ref = node1
    
    # Clear local references
    del node1
    del node2
    # Objects not deleted yet due to circular reference
    # GC will collect them later

ℹ️

Python uses reference counting as primary memory management, with generational garbage collection to handle circular references.

Garbage Collector Internals

import gc
import weakref
from typing import Any, Dict

class GCStats:
    """Helper to monitor garbage collection."""
    
    @staticmethod
    def get_stats() -> Dict:
        """Get current GC statistics."""
        return {
            'counts': gc.get_count(),
            'threshold': gc.get_threshold(),
            'garbage': len(gc.garbage),
            'stats': gc.get_stats()
        }
    
    @staticmethod
    def print_stats():
        """Print formatted GC statistics."""
        stats = GCStats.get_stats()
        print("=== GC Statistics ===")
        print(f"Counts (gen0, gen1, gen2): {stats['counts']}")
        print(f"Thresholds: {stats['threshold']}")
        print(f"Garbage objects: {stats['garbage']}")
        
        for i, gen_stat in enumerate(stats['stats']):
            print(f"\nGeneration {i}:")
            print(f"  Collections: {gen_stat['collections']}")
            print(f"  Collected: {gen_stat['collected']}")
            print(f"  Uncollectable: {gen_stat['uncollectable']}")

class CircularRef:
    """Class demonstrating circular reference handling."""
    
    def __init__(self, name: str):
        self.name = name
        self.partner = None
        print(f"Created CircularRef: {name}")
    
    def __repr__(self):
        return f"CircularRef({self.name})"
    
    def __del__(self):
        print(f"Deleting CircularRef: {self.name}")

def demonstrate_gc():
    """Demonstrate garbage collection behavior."""
    # Disable GC for manual control
    gc.disable()
    
    try:
        # Create objects
        a = CircularRef("A")
        b = CircularRef("B")
        
        # Create circular reference
        a.partner = b
        b.partner = a
        
        # Remove local references
        del a
        del b
        
        # Objects still exist due to circular reference
        print(f"Garbage count: {len(gc.garbage)}")
        
        # Manually run GC
        collected = gc.collect()
        print(f"Objects collected: {collected}")
        
    finally:
        gc.enable()

# Weak references
class WeakRefDemo:
    """Demonstrate weak references."""
    
    def __init__(self, value: Any):
        self.value = value
    
    def __repr__(self):
        return f"WeakRefDemo({self.value})"

def weak_reference_demo():
    """Demonstrate weak reference usage."""
    obj = WeakRefDemo(42)
    
    # Create weak reference
    weak_ref = weakref.ref(obj)
    print(f"Weak ref: {weak_ref()}")  # Returns object
    
    # Delete original object
    del obj
    
    # Weak reference now returns None
    print(f"Weak ref after deletion: {weak_ref()}")  # None

# WeakValueDictionary
def weak_value_dict_demo():
    """Demonstrate WeakValueDictionary."""
    cache = weakref.WeakValueDictionary()
    
    def get_or_create(key: str) -> WeakRefDemo:
        if key not in cache:
            cache[key] = WeakRefDemo(key)
        return cache[key]
    
    # Create and cache objects
    obj1 = get_or_create("item1")
    obj2 = get_or_create("item2")
    
    print(f"Cache size: {len(cache)}")  # 2
    
    del obj1
    import gc; gc.collect()
    
    print(f"Cache size after del: {len(cache)}")  # 1

⚠️

Circular references prevent immediate deallocation. Use weak references for caches and observers to avoid memory leaks.

Memory Optimization Techniques

import sys
from typing import List, Any
import array

class MemoryOptimizationDemo:
    """Demonstrate memory optimization techniques."""
    
    @staticmethod
    def compare_memory_footprint():
        """Compare memory usage of different data structures."""
        n = 1000000
        
        # List vs array vs generator
        list_data = [i for i in range(n)]
        array_data = array.array('i', range(n))
        generator_data = (i for i in range(n))
        
        print(f"List: {sys.getsizeof(list_data)} bytes")
        print(f"Array: {sys.getsizeof(array_data)} bytes")
        print(f"Generator: {sys.getsizeof(generator_data)} bytes")
        
        # Dictionary vs __slots__
        class RegularClass:
            def __init__(self, x, y, z):
                self.x = x
                self.y = y
                self.z = z
        
        class SlotClass:
            __slots__ = ['x', 'y', 'z']
            
            def __init__(self, x, y, z):
                self.x = x
                self.y = y
                self.z = z
        
        regular = RegularClass(1, 2, 3)
        slotted = SlotClass(1, 2, 3)
        
        print(f"Regular class: {sys.getsizeof(regular) + sys.getsizeof(regular.__dict__)} bytes")
        print(f"Slotted class: {sys.getsizeof(slotted)} bytes")

class OptimizedDataStructure:
    """Memory-optimized data structure using __slots__."""
    
    __slots__ = ['_data', '_index']
    
    def __init__(self, data: List[Any]):
        self._data = data
        self._index = {}
        self._build_index()
    
    def _build_index(self):
        """Build index for fast lookups."""
        for i, item in enumerate(self._data):
            if item not in self._index:
                self._index[item] = []
            self._index[item].append(i)
    
    def search(self, item: Any) -> List[int]:
        """Search using index - O(1) lookup."""
        return self._index.get(item, [])
    
    def __len__(self):
        return len(self._data)

# Memory pool pattern
class MemoryPool:
    """Simple memory pool for object reuse."""
    
    def __init__(self, factory, max_size: int = 100):
        self.factory = factory
        self.max_size = max_size
        self.pool = []
    
    def acquire(self):
        """Get object from pool or create new."""
        if self.pool:
            return self.pool.pop()
        return self.factory()
    
    def release(self, obj):
        """Return object to pool."""
        if len(self.pool) < self.max_size:
            self.pool.append(obj)
    
    def __enter__(self):
        return self.acquire()
    
    def __exit__(self, exc_type, exc_val, exc_tb):
        self.release(self)
        return False

Memory Profiling

import tracemalloc
import linecache
from typing import Callable, Any
from functools import wraps
import time

class MemoryProfiler:
    """Memory profiling utilities."""
    
    @staticmethod
    def start():
        """Start memory tracing."""
        tracemalloc.start()
    
    @staticmethod
    def get_stats():
        """Get current memory statistics."""
        return tracemalloc.get_traced_memory()
    
    @staticmethod
    def print_stats():
        """Print formatted memory statistics."""
        current, peak = tracemalloc.get_traced_memory()
        print(f"Current memory: {current / 1024 / 1024:.2f} MB")
        print(f"Peak memory: {peak / 1024 / 1024:.2f} MB")
    
    @staticmethod
    def take_snapshot():
        """Take memory snapshot."""
        return tracemalloc.take_snapshot()
    
    @staticmethod
    def print_top_allocations(snapshot, top_n: int = 10):
        """Print top memory allocations."""
        stats = snapshot.statistics('lineno')
        
        print(f"\nTop {top_n} memory allocations:")
        for i, stat in enumerate(stats[:top_n], 1):
            print(f"{i}. {stat}")

def memory_profile(func: Callable) -> Callable:
    """Decorator to profile memory usage."""
    @wraps(func)
    def wrapper(*args, **kwargs):
        tracemalloc.start()
        
        start_time = time.perf_counter()
        result = func(*args, **kwargs)
        end_time = time.perf_counter()
        
        current, peak = tracemalloc.get_traced_memory()
        tracemalloc.stop()
        
        print(f"\n=== Memory Profile: {func.__name__} ===")
        print(f"Execution time: {end_time - start_time:.4f}s")
        print(f"Current memory: {current / 1024 / 1024:.2f} MB")
        print(f"Peak memory: {peak / 1024 / 1024:.2f} MB")
        
        return result
    return wrapper

@memory_profile
def memory_intensive_operation():
    """Example memory-intensive operation."""
    # Create large list
    large_list = [i ** 2 for i in range(1000000)]
    
    # Process data
    result = sum(large_list)
    
    return result

# Usage
tracemalloc.start()
snapshot1 = tracemalloc.take_snapshot()

# Perform operation
result = memory_intensive_operation()

snapshot2 = tracemalloc.take_snapshot()
stats = snapshot2.compare_to(snapshot1, 'lineno')

print("\nTop memory increases:")
for stat in stats[:5]:
    print(stat)

Weak References and Callbacks

import weakref
from typing import Any, Callable, Dict, Optional

class Observable:
    """Observable pattern using weak references."""
    
    def __init__(self):
        self._observers: Dict[str, list] = {}
    
    def add_observer(self, event: str, callback: Callable):
        """Add observer with weak reference."""
        if event not in self._observers:
            self._observers[event] = []
        
        # Use weak reference to prevent memory leaks
        ref = weakref.WeakMethod(callback, self._make_ref_callback(event))
        self._observers[event].append(ref)
    
    def _make_ref_callback(self, event: str) -> Callable:
        """Create callback for dead references."""
        def callback(ref):
            print(f"Observer for {event} was garbage collected")
        return callback
    
    def notify(self, event: str, *args, **kwargs):
        """Notify all observers of event."""
        if event not in self._observers:
            return
        
        dead_refs = []
        for ref in self._observers[event]:
            callback = ref()
            if callback is not None:
                callback(*args, **kwargs)
            else:
                dead_refs.append(ref)
        
        # Clean up dead references
        for ref in dead_refs:
            self._observers[event].remove(ref)

class CacheEntry:
    """Cache entry with expiration."""
    
    def __init__(self, value: Any, ttl: float = 60.0):
        import time
        self.value = value
        self.timestamp = time.time()
        self.ttl = ttl
    
    def is_expired(self) -> bool:
        import time
        return time.time() - self.timestamp > self.ttl

class TTLCache:
    """Cache with time-to-live using weak references."""
    
    def __init__(self, maxsize: int = 128):
        self.maxsize = maxsize
        self._cache: Dict[str, CacheEntry] = {}
    
    def get(self, key: str) -> Optional[Any]:
        """Get value from cache."""
        if key in self._cache:
            entry = self._cache[key]
            if not entry.is_expired():
                return entry.value
            else:
                del self._cache[key]
        return None
    
    def set(self, key: str, value: Any, ttl: float = 60.0):
        """Set value in cache."""
        if len(self._cache) >= self.maxsize:
            self._evict_expired()
            if len(self._cache) >= self.maxsize:
                self._evict_oldest()
        
        self._cache[key] = CacheEntry(value, ttl)
    
    def _evict_expired(self):
        """Remove expired entries."""
        expired = [k for k, v in self._cache.items() if v.is_expired()]
        for k in expired:
            del self._cache[k]
    
    def _evict_oldest(self):
        """Remove oldest entry."""
        if self._cache:
            oldest_key = min(self._cache.keys(), key=lambda k: self._cache[k].timestamp)
            del self._cache[oldest_key]

# Usage
cache = TTLCache(maxsize=100)
cache.set("user:1", {"name": "John"}, ttl=30)
user = cache.get("user:1")
print(user)  # {'name': 'John'}

ℹ️

Use weakref for caches, observers, and any pattern where you want to avoid preventing garbage collection of referenced objects.

Follow-Up Questions

  1. Explain the difference between reference counting and garbage collection.

  2. How does Python handle circular references?

  3. What are the benefits of using __slots__?

  4. How do you detect and fix memory leaks in Python?

  5. Explain the generational garbage collection algorithm.

Advertisement