Python JSON — Serialization & Data Exchange

JSON (JavaScript Object Notation) is the standard data interchange format. Python's json module handles encoding and decoding.

Learning Objectives

Serialize Python objects to JSON strings
Deserialize JSON to Python objects
Customize JSON serialization for complex types
Handle large JSON files with streaming
Work with nested JSON structures efficiently
Use high-performance JSON libraries

Basic Encoding and Decoding

import json

# Python dict to JSON string
data = {
    "name": "Alice",
    "age": 30,
    "scores": [95, 87, 91],
    "active": True,
    "address": None
}

json_string = json.dumps(data, indent=2)
print(json_string)
# {
#   "name": "Alice",
#   "age": 30,
#   "scores": [95, 87, 91],
#   "active": true,
#   "address": null
# }

# JSON string to Python dict
parsed = json.loads(json_string)
print(parsed["name"])  # "Alice"
print(type(parsed["age"]))  # <class 'int'>
print(type(parsed["active"]))  # <class 'bool'>

Type Mapping

JSON Type	Python Type	Example
object	dict	`{"a": 1}` -> `{"a": 1}`
array	list	`[1, 2]` -> `[1, 2]`
string	str	`"hello"` -> `"hello"`
number (int)	int	`42` -> `42`
number (float)	float	`3.14` -> `3.14`
true	True	`true` -> `True`
false	False	`false` -> `False`
null	None	`null` -> `None`

File Operations

import json

# Write JSON to file
data = {"users": [{"name": "Alice"}, {"name": "Bob"}]}
with open('data.json', 'w') as f:
    json.dump(data, f, indent=2)

# Read JSON from file
with open('data.json', 'r') as f:
    loaded = json.load(f)

# Write compact JSON
with open('data_compact.json', 'w') as f:
    json.dump(data, f, separators=(',', ':'))

# Handle JSONL (JSON Lines) files
def write_jsonl(filename, records):
    with open(filename, 'w') as f:
        for record in records:
            f.write(json.dumps(record) + '\n')

def read_jsonl(filename):
    with open(filename, 'r') as f:
        for line in f:
            if line.strip():
                yield json.loads(line)

Custom Serialization

import json
from datetime import datetime, date
from decimal import Decimal

class CustomEncoder(json.JSONEncoder):
    def default(self, obj):
        if isinstance(obj, (datetime, date)):
            return obj.isoformat()
        if isinstance(obj, Decimal):
            return float(obj)
        if isinstance(obj, set):
            return list(obj)
        if isinstance(obj, bytes):
            return obj.decode('utf-8', errors='replace')
        if hasattr(obj, '__dict__'):
            return obj.__dict__
        return super().default(obj)

data = {
    "event": "Conference",
    "date": datetime.now(),
    "tags": {"python", "tech"},
    "price": Decimal("29.99"),
    "attendees": 500
}

json_str = json.dumps(data, cls=CustomEncoder, indent=2)
print(json_str)

# Simple default function
def default_handler(obj):
    if isinstance(obj, datetime):
        return obj.isoformat()
    if isinstance(obj, set):
        return list(obj)
    raise TypeError(f"Object of type {type(obj)} is not JSON serializable")

# Use default parameter instead of custom class
json_str = json.dumps(data, default=default_handler, indent=2)

Working with Nested JSON

import json

# Access nested data safely
api_response = {
    "status": "success",
    "data": {
        "users": [
            {"name": "Alice", "contacts": {"email": "alice@example.com"}},
            {"name": "Bob", "contacts": {"email": "bob@example.com"}}
        ]
    }
}

# Safe nested access
def safe_get(data, *keys, default=None):
    for key in keys:
        if isinstance(data, dict):
            data = data.get(key, default)
        else:
            return default
    return data

emails = [
    safe_get(user, "contacts", "email")
    for user in safe_get(api_response, "data", "users", default=[])
]
print(emails)  # ['alice@example.com', 'bob@example.com']

# Flatten nested JSON
def flatten_json(data, prefix=''):
    items = {}
    for key, value in data.items():
        new_key = f"{prefix}.{key}" if prefix else key
        if isinstance(value, dict):
            items.update(flatten_json(value, new_key))
        elif isinstance(value, list):
            for i, item in enumerate(value):
                if isinstance(item, dict):
                    items.update(flatten_json(item, f"{new_key}.{i}"))
                else:
                    items[f"{new_key}.{i}"] = item
        else:
            items[new_key] = value
    return items

nested = {"a": {"b": 1, "c": {"d": 2}}, "e": [3, 4, 5]}
flat = flatten_json(nested)
print(flat)
# {'a.b': 1, 'a.c.d': 2, 'e.0': 3, 'e.1': 4, 'e.2': 5}

Advanced Options

import json

data = {"name": "Alice", "age": 30, "city": "NYC"}

# Compact output
compact = json.dumps(data, separators=(',', ':'))
# '{"name":"Alice","age":30,"city":"NYC"}'

# Sort keys
sorted_json = json.dumps(data, sort_keys=True, indent=2)
# {
#   "age": 30,
#   "city": "NYC",
#   "name": "Alice"
# }

# Handle non-serializable objects
def handle_non_serializable(obj):
    return str(obj)

json.dumps({"date": "2024-01-15"}, default=handle_non_serializable)

# Parse with object_hook for custom decoding
def as_complex(dct):
    if '__complex__' in dct:
        return complex(dct['real'], dct['imag'])
    return dct

# object_pairs_hook preserves key order
ordered = json.loads('{"b": 2, "a": 1}', object_pairs_hook=dict)

# Check if string is valid JSON
def is_valid_json(string):
    try:
        json.loads(string)
        return True
    except (json.JSONDecodeError, TypeError):
        return False

print(is_valid_json('{"valid": true}'))  # True
print(is_valid_json('not json'))          # False

JSON Lines (JSONL)

import json

# Write JSONL — one JSON object per line
def write_jsonl(filename, records):
    with open(filename, 'w') as f:
        for record in records:
            f.write(json.dumps(record) + '\n')

# Read JSONL — memory efficient for large files
def read_jsonl(filename):
    with open(filename, 'r') as f:
        for line_num, line in enumerate(f, 1):
            line = line.strip()
            if line:
                try:
                    yield json.loads(line)
                except json.JSONDecodeError as e:
                    print(f"Error on line {line_num}: {e}")

# Process large JSONL file
def filter_jsonl(input_file, output_file, predicate):
    with open(input_file, 'r') as fin, open(output_file, 'w') as fout:
        for line in fin:
            if line.strip():
                record = json.loads(line)
                if predicate(record):
                    fout.write(json.dumps(record) + '\n')

# Convert JSONL to CSV
import csv

def jsonl_to_csv(jsonl_file, csv_file):
    with open(jsonl_file) as fin, open(csv_file, 'w', newline='') as fout:
        records = [json.loads(line) for line in fin if line.strip()]
        if records:
            writer = csv.DictWriter(fout, fieldnames=records[0].keys())
            writer.writeheader()
            writer.writerows(records)

Streaming Large JSON

import json

# Write large JSON arrays incrementally
def write_large_json(filename, items):
    with open(filename, 'w') as f:
        f.write('[\n')
        for i, item in enumerate(items):
            json.dump(item, f)
            if i < len(items) - 1:
                f.write(',\n')
        f.write('\n]')

# Read large JSON files with ijson (third-party)
# pip install ijson
# import ijson
# with open('large.json', 'rb') as f:
#     for record in ijson.items(f, 'item'):
#         process(record)

# Streaming JSON parser for arrays
def stream_json_array(filename):
    """Parse a JSON array without loading entire file."""
    with open(filename, 'r') as f:
        content = f.read()

    # Simple streaming for demonstration
    decoder = json.JSONDecoder()
    idx = content.index('[') + 1
    while idx < len(content):
        idx = content.index('{', idx) if '{' in content[idx:] else None
        if idx is None:
            break
        try:
            obj, end = decoder.raw_decode(content, idx)
            yield obj
            idx = end
        except json.JSONDecodeError:
            break

Real-World: API Response Processing

import json
from datetime import datetime

class APIResponseProcessor:
    def __init__(self):
        self.cache = {}
        self.errors = []

    def process_response(self, response_text):
        try:
            data = json.loads(response_text)
        except json.JSONDecodeError as e:
            self.errors.append(f"Parse error: {e}")
            return None

        if data.get('status') != 'success':
            self.errors.append(f"API error: {data.get('message', 'Unknown')}")
            return None

        return self._transform(data.get('data', {}))

    def _transform(self, data):
        transformed = {}
        for key, value in data.items():
            if isinstance(value, list):
                transformed[key] = [
                    self._process_item(item) for item in value
                ]
            else:
                transformed[key] = value
        return transformed

    def _process_item(self, item):
        processed = {}
        for k, v in item.items():
            if isinstance(v, str) and 'T' in v:
                try:
                    processed[k] = datetime.fromisoformat(v)
                except ValueError:
                    processed[k] = v
            else:
                processed[k] = v
        return processed

    def to_json(self, data, indent=2):
        """Serialize processed data back to JSON."""
        def serializer(obj):
            if isinstance(obj, datetime):
                return obj.isoformat()
            return str(obj)

        return json.dumps(data, default=serializer, indent=indent)

# Usage
processor = APIResponseProcessor()
response = '{"status": "success", "data": {"users": [{"name": "Alice", "created": "2024-01-15T10:30:00"}]}}'
result = processor.process_response(response)

Real-World: Config File Handler

import json
from pathlib import Path

class ConfigManager:
    def __init__(self, config_path):
        self.config_path = Path(config_path)
        self.config = {}
        self.defaults = {}
        self.load()

    def load(self):
        if self.config_path.exists():
            with open(self.config_path, 'r') as f:
                self.config = json.load(f)
        else:
            self.config = {}

    def save(self):
        with open(self.config_path, 'w') as f:
            json.dump(self.config, f, indent=2, sort_keys=True)

    def get(self, key, default=None):
        return self.config.get(key, default)

    def set(self, key, value):
        self.config[key] = value
        self.save()

    def update(self, data):
        self.config.update(data)
        self.save()

    def validate(self, schema):
        """Simple schema validation."""
        errors = []
        for key, expected_type in schema.items():
            if key not in self.config:
                errors.append(f"Missing key: {key}")
            elif not isinstance(self.config[key], expected_type):
                errors.append(f"Invalid type for {key}: expected {expected_type.__name__}")
        return errors

    def merge(self, other_config):
        """Deep merge another config."""
        def deep_merge(base, override):
            for key, value in override.items():
                if key in base and isinstance(base[key], dict) and isinstance(value, dict):
                    deep_merge(base[key], value)
                else:
                    base[key] = value
        deep_merge(self.config, other_config)
        self.save()

# Usage
config = ConfigManager('app.json')
config.set('debug', True)
config.set('database', {'host': 'localhost', 'port': 5432})

errors = config.validate({
    'debug': bool,
    'database': dict
})

Common Mistakes

import json

# Mistake 1: Not handling JSONDecodeError
# json.loads("invalid")  # Raises JSONDecodeError

try:
    data = json.loads("invalid json")
except json.JSONDecodeError as e:
    print(f"Parse error: {e}")

# Mistake 2: Using single quotes
# json.loads("{'key': 'value'}")  # Invalid!

# Fix: use double quotes or load from file
data = json.loads('{"key": "value"}')

# Mistake 3: Confusing dump/dumps and load/loads
# dumps = dump to string
# loads = load from string
# dump = dump to file
# load = load from file

# Mistake 4: Forgetting default parameter
class MyObj:
    pass

# json.dumps({"obj": MyObj()})  # TypeError!
json.dumps({"obj": "converted"}, default=str)

# Mistake 5: Not handling None values
data = {"a": None, "b": "value"}
# JSON null becomes Python None
result = json.dumps(data)
# {"a": null, "b": "value"}

# Mistake 6: Large files without streaming
# Loading entire 1GB JSON file into memory!
# data = json.load(f)  # Uses ~1GB+ memory

# Fix: stream with ijson or process line by line

# Mistake 7: Not using indent for debugging
# Hard to read: {"name":"Alice","age":30}
# Better: json.dumps(data, indent=2)

Key Takeaways

json.dumps() encodes Python objects to JSON strings
json.loads() decodes JSON strings to Python objects
Use indent for pretty-printing
Custom encoder for non-standard types
JSON is text — use streaming for large data
Use separators=(',', ':') for compact output
Always handle JSONDecodeError when parsing external data

Python JSON — Serialization & Data Exchange

Python JSON — Serialization & Data Exchange

Learning Objectives

Basic Encoding and Decoding

Type Mapping

File Operations

Custom Serialization

Working with Nested JSON

Advanced Options

JSON Lines (JSONL)

Streaming Large JSON

Real-World: API Response Processing

Real-World: Config File Handler

Common Mistakes

Key Takeaways

Premium Content

Need Expert Python Help?