Python JSON — Serialization & Data Exchange
JSON (JavaScript Object Notation) is the standard data interchange format. Python's json module handles encoding and decoding.
Learning Objectives
- Serialize Python objects to JSON strings
- Deserialize JSON to Python objects
- Customize JSON serialization for complex types
- Handle large JSON files with streaming
- Work with nested JSON structures efficiently
- Use high-performance JSON libraries
Basic Encoding and Decoding
import json
# Python dict to JSON string
data = {
"name": "Alice",
"age": 30,
"scores": [95, 87, 91],
"active": True,
"address": None
}
json_string = json.dumps(data, indent=2)
print(json_string)
# {
# "name": "Alice",
# "age": 30,
# "scores": [95, 87, 91],
# "active": true,
# "address": null
# }
# JSON string to Python dict
parsed = json.loads(json_string)
print(parsed["name"]) # "Alice"
print(type(parsed["age"])) # <class 'int'>
print(type(parsed["active"])) # <class 'bool'>
Type Mapping
| JSON Type | Python Type | Example |
|---|---|---|
| object | dict | {"a": 1} -> {"a": 1} |
| array | list | [1, 2] -> [1, 2] |
| string | str | "hello" -> "hello" |
| number (int) | int | 42 -> 42 |
| number (float) | float | 3.14 -> 3.14 |
| true | True | true -> True |
| false | False | false -> False |
| null | None | null -> None |
File Operations
import json
# Write JSON to file
data = {"users": [{"name": "Alice"}, {"name": "Bob"}]}
with open('data.json', 'w') as f:
json.dump(data, f, indent=2)
# Read JSON from file
with open('data.json', 'r') as f:
loaded = json.load(f)
# Write compact JSON
with open('data_compact.json', 'w') as f:
json.dump(data, f, separators=(',', ':'))
# Handle JSONL (JSON Lines) files
def write_jsonl(filename, records):
with open(filename, 'w') as f:
for record in records:
f.write(json.dumps(record) + '\n')
def read_jsonl(filename):
with open(filename, 'r') as f:
for line in f:
if line.strip():
yield json.loads(line)
Custom Serialization
import json
from datetime import datetime, date
from decimal import Decimal
class CustomEncoder(json.JSONEncoder):
def default(self, obj):
if isinstance(obj, (datetime, date)):
return obj.isoformat()
if isinstance(obj, Decimal):
return float(obj)
if isinstance(obj, set):
return list(obj)
if isinstance(obj, bytes):
return obj.decode('utf-8', errors='replace')
if hasattr(obj, '__dict__'):
return obj.__dict__
return super().default(obj)
data = {
"event": "Conference",
"date": datetime.now(),
"tags": {"python", "tech"},
"price": Decimal("29.99"),
"attendees": 500
}
json_str = json.dumps(data, cls=CustomEncoder, indent=2)
print(json_str)
# Simple default function
def default_handler(obj):
if isinstance(obj, datetime):
return obj.isoformat()
if isinstance(obj, set):
return list(obj)
raise TypeError(f"Object of type {type(obj)} is not JSON serializable")
# Use default parameter instead of custom class
json_str = json.dumps(data, default=default_handler, indent=2)
Working with Nested JSON
import json
# Access nested data safely
api_response = {
"status": "success",
"data": {
"users": [
{"name": "Alice", "contacts": {"email": "alice@example.com"}},
{"name": "Bob", "contacts": {"email": "bob@example.com"}}
]
}
}
# Safe nested access
def safe_get(data, *keys, default=None):
for key in keys:
if isinstance(data, dict):
data = data.get(key, default)
else:
return default
return data
emails = [
safe_get(user, "contacts", "email")
for user in safe_get(api_response, "data", "users", default=[])
]
print(emails) # ['alice@example.com', 'bob@example.com']
# Flatten nested JSON
def flatten_json(data, prefix=''):
items = {}
for key, value in data.items():
new_key = f"{prefix}.{key}" if prefix else key
if isinstance(value, dict):
items.update(flatten_json(value, new_key))
elif isinstance(value, list):
for i, item in enumerate(value):
if isinstance(item, dict):
items.update(flatten_json(item, f"{new_key}.{i}"))
else:
items[f"{new_key}.{i}"] = item
else:
items[new_key] = value
return items
nested = {"a": {"b": 1, "c": {"d": 2}}, "e": [3, 4, 5]}
flat = flatten_json(nested)
print(flat)
# {'a.b': 1, 'a.c.d': 2, 'e.0': 3, 'e.1': 4, 'e.2': 5}
Advanced Options
import json
data = {"name": "Alice", "age": 30, "city": "NYC"}
# Compact output
compact = json.dumps(data, separators=(',', ':'))
# '{"name":"Alice","age":30,"city":"NYC"}'
# Sort keys
sorted_json = json.dumps(data, sort_keys=True, indent=2)
# {
# "age": 30,
# "city": "NYC",
# "name": "Alice"
# }
# Handle non-serializable objects
def handle_non_serializable(obj):
return str(obj)
json.dumps({"date": "2024-01-15"}, default=handle_non_serializable)
# Parse with object_hook for custom decoding
def as_complex(dct):
if '__complex__' in dct:
return complex(dct['real'], dct['imag'])
return dct
# object_pairs_hook preserves key order
ordered = json.loads('{"b": 2, "a": 1}', object_pairs_hook=dict)
# Check if string is valid JSON
def is_valid_json(string):
try:
json.loads(string)
return True
except (json.JSONDecodeError, TypeError):
return False
print(is_valid_json('{"valid": true}')) # True
print(is_valid_json('not json')) # False
JSON Lines (JSONL)
import json
# Write JSONL — one JSON object per line
def write_jsonl(filename, records):
with open(filename, 'w') as f:
for record in records:
f.write(json.dumps(record) + '\n')
# Read JSONL — memory efficient for large files
def read_jsonl(filename):
with open(filename, 'r') as f:
for line_num, line in enumerate(f, 1):
line = line.strip()
if line:
try:
yield json.loads(line)
except json.JSONDecodeError as e:
print(f"Error on line {line_num}: {e}")
# Process large JSONL file
def filter_jsonl(input_file, output_file, predicate):
with open(input_file, 'r') as fin, open(output_file, 'w') as fout:
for line in fin:
if line.strip():
record = json.loads(line)
if predicate(record):
fout.write(json.dumps(record) + '\n')
# Convert JSONL to CSV
import csv
def jsonl_to_csv(jsonl_file, csv_file):
with open(jsonl_file) as fin, open(csv_file, 'w', newline='') as fout:
records = [json.loads(line) for line in fin if line.strip()]
if records:
writer = csv.DictWriter(fout, fieldnames=records[0].keys())
writer.writeheader()
writer.writerows(records)
Streaming Large JSON
import json
# Write large JSON arrays incrementally
def write_large_json(filename, items):
with open(filename, 'w') as f:
f.write('[\n')
for i, item in enumerate(items):
json.dump(item, f)
if i < len(items) - 1:
f.write(',\n')
f.write('\n]')
# Read large JSON files with ijson (third-party)
# pip install ijson
# import ijson
# with open('large.json', 'rb') as f:
# for record in ijson.items(f, 'item'):
# process(record)
# Streaming JSON parser for arrays
def stream_json_array(filename):
"""Parse a JSON array without loading entire file."""
with open(filename, 'r') as f:
content = f.read()
# Simple streaming for demonstration
decoder = json.JSONDecoder()
idx = content.index('[') + 1
while idx < len(content):
idx = content.index('{', idx) if '{' in content[idx:] else None
if idx is None:
break
try:
obj, end = decoder.raw_decode(content, idx)
yield obj
idx = end
except json.JSONDecodeError:
break
Real-World: API Response Processing
import json
from datetime import datetime
class APIResponseProcessor:
def __init__(self):
self.cache = {}
self.errors = []
def process_response(self, response_text):
try:
data = json.loads(response_text)
except json.JSONDecodeError as e:
self.errors.append(f"Parse error: {e}")
return None
if data.get('status') != 'success':
self.errors.append(f"API error: {data.get('message', 'Unknown')}")
return None
return self._transform(data.get('data', {}))
def _transform(self, data):
transformed = {}
for key, value in data.items():
if isinstance(value, list):
transformed[key] = [
self._process_item(item) for item in value
]
else:
transformed[key] = value
return transformed
def _process_item(self, item):
processed = {}
for k, v in item.items():
if isinstance(v, str) and 'T' in v:
try:
processed[k] = datetime.fromisoformat(v)
except ValueError:
processed[k] = v
else:
processed[k] = v
return processed
def to_json(self, data, indent=2):
"""Serialize processed data back to JSON."""
def serializer(obj):
if isinstance(obj, datetime):
return obj.isoformat()
return str(obj)
return json.dumps(data, default=serializer, indent=indent)
# Usage
processor = APIResponseProcessor()
response = '{"status": "success", "data": {"users": [{"name": "Alice", "created": "2024-01-15T10:30:00"}]}}'
result = processor.process_response(response)
Real-World: Config File Handler
import json
from pathlib import Path
class ConfigManager:
def __init__(self, config_path):
self.config_path = Path(config_path)
self.config = {}
self.defaults = {}
self.load()
def load(self):
if self.config_path.exists():
with open(self.config_path, 'r') as f:
self.config = json.load(f)
else:
self.config = {}
def save(self):
with open(self.config_path, 'w') as f:
json.dump(self.config, f, indent=2, sort_keys=True)
def get(self, key, default=None):
return self.config.get(key, default)
def set(self, key, value):
self.config[key] = value
self.save()
def update(self, data):
self.config.update(data)
self.save()
def validate(self, schema):
"""Simple schema validation."""
errors = []
for key, expected_type in schema.items():
if key not in self.config:
errors.append(f"Missing key: {key}")
elif not isinstance(self.config[key], expected_type):
errors.append(f"Invalid type for {key}: expected {expected_type.__name__}")
return errors
def merge(self, other_config):
"""Deep merge another config."""
def deep_merge(base, override):
for key, value in override.items():
if key in base and isinstance(base[key], dict) and isinstance(value, dict):
deep_merge(base[key], value)
else:
base[key] = value
deep_merge(self.config, other_config)
self.save()
# Usage
config = ConfigManager('app.json')
config.set('debug', True)
config.set('database', {'host': 'localhost', 'port': 5432})
errors = config.validate({
'debug': bool,
'database': dict
})
Common Mistakes
import json
# Mistake 1: Not handling JSONDecodeError
# json.loads("invalid") # Raises JSONDecodeError
try:
data = json.loads("invalid json")
except json.JSONDecodeError as e:
print(f"Parse error: {e}")
# Mistake 2: Using single quotes
# json.loads("{'key': 'value'}") # Invalid!
# Fix: use double quotes or load from file
data = json.loads('{"key": "value"}')
# Mistake 3: Confusing dump/dumps and load/loads
# dumps = dump to string
# loads = load from string
# dump = dump to file
# load = load from file
# Mistake 4: Forgetting default parameter
class MyObj:
pass
# json.dumps({"obj": MyObj()}) # TypeError!
json.dumps({"obj": "converted"}, default=str)
# Mistake 5: Not handling None values
data = {"a": None, "b": "value"}
# JSON null becomes Python None
result = json.dumps(data)
# {"a": null, "b": "value"}
# Mistake 6: Large files without streaming
# Loading entire 1GB JSON file into memory!
# data = json.load(f) # Uses ~1GB+ memory
# Fix: stream with ijson or process line by line
# Mistake 7: Not using indent for debugging
# Hard to read: {"name":"Alice","age":30}
# Better: json.dumps(data, indent=2)
Key Takeaways
json.dumps()encodes Python objects to JSON stringsjson.loads()decodes JSON strings to Python objects- Use
indentfor pretty-printing - Custom encoder for non-standard types
- JSON is text — use streaming for large data
- Use
separators=(',', ':')for compact output - Always handle
JSONDecodeErrorwhen parsing external data