Python Math & Statistics — Numerical Computing
Python's math and statistics modules provide mathematical functions and statistical calculations for data analysis.
Learning Objectives
- Use math module for advanced mathematical operations
- Calculate descriptive statistics with the statistics module
- Work with decimal and fraction for precision
- Apply mathematical concepts in real-world scenarios
- Build simulations and statistical analyses
Math Module
import math
# Constants
print(math.pi) # 3.141592653589793
print(math.e) # 2.718281828459045
print(math.tau) # 6.283185307179586 (2 * pi)
print(math.inf) # infinity
print(math.nan) # Not a Number
# Basic operations
print(math.sqrt(16)) # 4.0
print(math.pow(2, 10)) # 1024.0
print(math.exp(1)) # 2.718281828459045
print(math.log(100, 10)) # 2.0
print(math.log2(256)) # 8.0
print(math.log10(1000)) # 3.0
print(math.factorial(5)) # 120
print(math.gcd(12, 8)) # 4
print(math.comb(10, 3)) # 120 (combinations)
print(math.perm(10, 3)) # 720 (permutations)
# Accurate floating-point sum
print(math.fsum([0.1, 0.1, 0.1, 0.1, 0.1])) # 0.5
print(sum([0.1, 0.1, 0.1, 0.1, 0.1])) # 0.5000000000000001
# Floor and ceiling
print(math.floor(3.7)) # 3
print(math.ceil(3.2)) # 4
# isclose for float comparison
print(math.isclose(0.1 + 0.2, 0.3)) # True
print(math.isclose(0.1, 0.1000001, rel_tol=1e-6)) # True
# Hypotenuse and atan2
print(math.hypot(3, 4)) # 5.0
print(math.atan2(1, 1)) # pi/4
Math Functions Reference
| Function | Description | Example |
|---|
sqrt(x) | Square root | sqrt(16) -> 4.0 |
pow(x, y) | x raised to y | pow(2, 10) -> 1024.0 |
log(x[, base]) | Logarithm | log(100, 10) -> 2.0 |
log2(x) | Base-2 logarithm | log2(256) -> 8.0 |
log10(x) | Base-10 logarithm | log10(1000) -> 3.0 |
factorial(n) | n! | factorial(5) -> 120 |
gcd(a, b) | Greatest common divisor | gcd(12, 8) -> 4 |
comb(n, k) | Combinations (n choose k) | comb(10, 3) -> 120 |
perm(n, k) | Permutations | perm(10, 3) -> 720 |
isclose(a, b) | Float comparison | isclose(0.1+0.2, 0.3) -> True |
Trigonometry
import math
# Degrees to radians
math.radians(180) # pi
math.radians(90) # pi/2
math.degrees(math.pi) # 180
math.degrees(math.pi/2) # 90
# Trig functions (radians)
math.sin(math.pi / 2) # 1.0
math.cos(0) # 1.0
math.tan(math.pi / 4) # 1.0
# Inverse trig
math.asin(1) # pi/2
math.acos(1) # 0
math.atan(1) # pi/4
# Hyperbolic
math.sinh(1) # 1.1752011936438014
math.cosh(1) # 1.5430806348152437
math.tanh(1) # 0.7615941559557649
# Practical example: distance between two points
def distance(x1, y1, x2, y2):
return math.hypot(x2 - x1, y2 - y1)
d = distance(0, 0, 3, 4)
print(f"Distance: {d}") # 5.0
# Angle between two vectors
def angle_between(x1, y1, x2, y2):
return math.atan2(y2 - y1, x2 - x1)
angle = angle_between(1, 0, 0, 1)
print(f"Angle: {math.degrees(angle)} degrees") # 90.0 degrees
Trigonometric Functions Table
| Function | Description | Example |
|---|
sin(x) | Sine | sin(pi/2) -> 1.0 |
cos(x) | Cosine | cos(0) -> 1.0 |
tan(x) | Tangent | tan(pi/4) -> 1.0 |
asin(x) | Arc sine | asin(1) -> pi/2 |
acos(x) | Arc cosine | acos(1) -> 0 |
atan(x) | Arc tangent | atan(1) -> pi/4 |
atan2(y, x) | Two-argument arc tangent | atan2(1, 1) -> pi/4 |
hypot(x, y) | Hypotenuse | hypot(3, 4) -> 5.0 |
degrees(x) | Radians to degrees | degrees(pi) -> 180 |
radians(x) | Degrees to radians | radians(180) -> pi |
Statistics Module
import statistics
data = [23, 45, 12, 67, 34, 89, 23, 56]
# Central tendency
print(statistics.mean(data)) # 43.625
print(statistics.median(data)) # 39.5
print(statistics.mode([1, 1, 2, 3])) # 1
print(statistics.multimode([1, 1, 2, 2, 3])) # [1, 2]
# Dispersion
print(statistics.stdev(data)) # 24.51 (sample standard deviation)
print(statistics.variance(data)) # 600.57 (sample variance)
print(statistics.pstdev(data)) # 22.95 (population standard deviation)
print(statistics.pvariance(data)) # 526.73 (population variance)
# Quantiles
print(statistics.quantiles(data, n=4)) # Quartiles: [26.5, 39.5, 61.5]
print(statistics.quantiles(data, n=10)) # Deciles
# High-level statistics
print(statistics.mean([1, 2, 3, 4, 5])) # 3.0
print(statistics.median([1, 2, 3, 4, 5])) # 3
print(statistics.stdev([1, 2, 3, 4, 5])) # 1.58
# Linear regression
x = [1, 2, 3, 4, 5]
y = [2, 4, 5, 4, 5]
# Use numpy for regression: np.polyfit(x, y, 1)
Statistics Comparison
| Function | Description | Use When |
|---|
mean() | Arithmetic average | Data is numeric, no outliers |
median() | Middle value | Data has outliers |
mode() | Most common value | Categorical data |
stdev() | Sample std deviation | Estimating population parameters |
pstdev() | Population std deviation | Complete population data |
Decimal for Precision
from decimal import Decimal, getcontext
# Float imprecision
print(0.1 + 0.2) # 0.30000000000000004
# Decimal for exact arithmetic
a = Decimal('0.1')
b = Decimal('0.2')
print(a + b) # 0.3
# Set precision
getcontext().prec = 50
result = Decimal(1) / Decimal(3)
print(result)
# 0.33333333333333333333333333333333333333333333333333
# Financial calculations
price = Decimal('19.99')
quantity = Decimal('3')
tax_rate = Decimal('0.08')
subtotal = price * quantity
tax = subtotal * tax_rate
total = subtotal + tax
print(f"Subtotal: ${subtotal}")
print(f"Tax: ${tax}")
print(f"Total: ${total}")
# Subtotal: $59.97
# Tax: $4.7976
# Total: $64.7676
# Rounding
print(total.quantize(Decimal('0.01'))) # $64.77
# Decimal vs float comparison
print(Decimal('0.1') + Decimal('0.2') == Decimal('0.3')) # True
print(0.1 + 0.2 == 0.3) # False
Decimal Operations
from decimal import Decimal, getcontext
# Set precision
getcontext().prec = 28
# Basic operations
a = Decimal('3.14159')
b = Decimal('2.71828')
print(f"Add: {a + b}") # 5.85987
print(f"Sub: {a - b}") # 0.42331
print(f"Mul: {a * b}") # 8.53972...
print(f"Div: {a / b}") # 1.15572...
# Rounding modes
from decimal import ROUND_HALF_UP, ROUND_DOWN, ROUND_CEILING
x = Decimal('2.5')
print(x.quantize(Decimal('1'), rounding=ROUND_HALF_UP)) # 3
print(x.quantize(Decimal('1'), rounding=ROUND_DOWN)) # 2
print(x.quantize(Decimal('1'), rounding=ROUND_CEILING)) # 3
# Context operations
with getcontext() as ctx:
ctx.prec = 10
result = Decimal(1) / Decimal(3)
print(f"High precision: {result}")
Fraction for Exact Ratios
from fractions import Fraction
# Exact fractions
a = Fraction(1, 3)
b = Fraction(1, 6)
print(a + b) # 1/2
print(float(a + b)) # 0.5
print(a * b) # 1/18
# From decimal
print(Fraction(0.25)) # 1/4
print(Fraction('0.75')) # 3/4
# From float (exact representation)
print(Fraction(0.1)) # 3602879701896397/36028797018963968
# Arithmetic stays exact
result = Fraction(1, 3) * 3
print(result) # 1 (exact, not 0.9999999)
# Practical: probability calculations
events = Fraction(1, 6) # Single die roll
two_dice = events * events # Both show specific number
print(f"Probability: {two_dice}") # 1/36
# Convert to percentage
print(f"Percentage: {float(two_dice) * 100:.2f}%") # 2.78%
Fraction Operations
from fractions import Fraction
# Basic operations
a = Fraction(1, 2)
b = Fraction(1, 3)
print(f"Add: {a + b}") # 5/6
print(f"Sub: {a - b}") # 1/6
print(f"Mul: {a * b}") # 1/6
print(f"Div: {a / b}") # 3/2
print(f"Pow: {a ** 2}") # 1/4
# Comparison
print(f"Equal: {a == Fraction(2, 4)}") # True
print(f"Less: {b < a}") # True
# Conversion
print(f"Decimal: {float(a)}") # 0.5
print(f"Integer: {a.numerator}") # 1
print(f"Denominator: {a.denominator}") # 2
# Simplify
f = Fraction(4, 8)
print(f"Simplified: {f}") # 1/2
# From float
f = Fraction(0.1)
print(f"From float: {f}") # 3602879701896397/36028797018963968
random Module
import random
# Basic random
print(random.random()) # Random float [0, 1)
print(random.uniform(1, 10)) # Random float [1, 10]
print(random.randint(1, 10)) # Random integer [1, 10]
print(random.randrange(0, 10, 2)) # Random even number [0, 10)
# Sequence operations
items = ['a', 'b', 'c', 'd', 'e']
print(random.choice(items)) # Random element
print(random.sample(items, 3)) # 3 unique random elements
random.shuffle(items) # Shuffle in place
# Seeding for reproducibility
random.seed(42)
print(random.randint(1, 100)) # Same every time
# Distribution
print(random.gauss(0, 1)) # Gaussian distribution
print(random.expovariate(1)) # Exponential distribution
# Practical: simulate dice rolls
def simulate_dice_rolls(n):
rolls = [random.randint(1, 6) for _ in range(n)]
from collections import Counter
return Counter(rolls)
results = simulate_dice_rolls(10000)
for face, count in sorted(results.items()):
print(f"Face {face}: {count} ({count/100:.1f}%)")
random Functions Reference
| Function | Description | Example |
|---|
random() | Random float [0, 1) | random() -> 0.713 |
uniform(a, b) | Random float [a, b] | uniform(1, 10) -> 5.23 |
randint(a, b) | Random integer [a, b] | randint(1, 10) -> 5 |
randrange(start, stop[, step]) | Random integer in range | randrange(0, 10, 2) -> 4 |
choice(seq) | Random element from sequence | choice([1, 2, 3]) -> 2 |
sample(population, k) | k unique random elements | sample([1, 2, 3], 2) -> [1, 3] |
shuffle(x) | Shuffle sequence in place | shuffle([1, 2, 3]) -> [3, 1, 2] |
seed(a) | Initialize random state | seed(42) |
gauss(mu, sigma) | Gaussian distribution | gauss(0, 1) -> 0.23 |
expovariate(lambd) | Exponential distribution | expovariate(1) -> 1.5 |
betavariate(alpha, beta) | Beta distribution | betavariate(2, 5) -> 0.35 |
gammavariate(alpha, beta) | Gamma distribution | gammavariate(2, 1) -> 1.2 |
triangular(low, high, mode) | Triangular distribution | triangular(0, 10, 5) -> 4.5 |
Real-World: Statistical Analysis
import statistics
import math
def analyze_dataset(data):
"""Comprehensive statistical analysis."""
n = len(data)
mean = statistics.mean(data)
median = statistics.median(data)
mode = statistics.mode(data)
stdev = statistics.stdev(data)
variance = statistics.variance(data)
# Coefficient of variation
cv = (stdev / mean) * 100 if mean != 0 else 0
# Z-scores
z_scores = [(x - mean) / stdev for x in data]
# Outliers (beyond 2 standard deviations)
outliers = [x for x, z in zip(data, z_scores) if abs(z) > 2]
# Quartiles
quartiles = statistics.quantiles(data, n=4)
return {
"count": n,
"mean": mean,
"median": median,
"mode": mode,
"stdev": stdev,
"variance": variance,
"cv": cv,
"min": min(data),
"max": max(data),
"range": max(data) - min(data),
"q1": quartiles[0],
"q2": quartiles[1],
"q3": quartiles[2],
"outliers": outliers
}
# Usage
scores = [85, 90, 78, 92, 88, 76, 95, 89, 84, 91,
87, 82, 90, 86, 93, 80, 88, 94, 77, 85]
results = analyze_dataset(scores)
for key, value in results.items():
print(f"{key}: {value:.2f}" if isinstance(value, float) else f"{key}: {value}")
Real-World: Monte Carlo Simulation
import random
import math
def estimate_pi(num_samples):
"""Estimate pi using Monte Carlo method."""
inside_circle = 0
for _ in range(num_samples):
x = random.uniform(-1, 1)
y = random.uniform(-1, 1)
if x**2 + y**2 <= 1:
inside_circle += 1
return 4 * inside_circle / num_samples
# Run simulation
estimates = []
for _ in range(10):
pi_est = estimate_pi(100000)
estimates.append(pi_est)
print(f"Estimate: {pi_est:.6f}")
print(f"\nAverage: {sum(estimates)/len(estimates):.6f}")
print(f"Actual: {math.pi:.6f}")
# Dice roll simulation
def simulate_games(n_games, n_dice):
"""Simulate dice games and calculate win probability."""
wins = 0
for _ in range(n_games):
total = sum(random.randint(1, 6) for _ in range(n_dice))
if total >= 15:
wins += 1
return wins / n_games
prob = simulate_games(100000, 3)
print(f"\nWin probability (3 dice, total >= 15): {prob:.4f}")
Common Mistakes
import math
import statistics
# Mistake 1: Float comparison without tolerance
# 0.1 + 0.2 == 0.3 # False!
print(math.isclose(0.1 + 0.2, 0.3)) # True
# Mistake 2: Using mean with outliers
data = [1, 2, 3, 4, 1000]
print(statistics.mean(data)) # 202.0 — skewed!
print(statistics.median(data)) # 3 — robust
# Mistake 3: Population vs sample statistics
# Use stdev() when working with a sample
# Use pstdev() when working with the entire population
# Mistake 4: Integer division confusion
print(7 / 2) # 3.5 (float division)
print(7 // 2) # 3 (integer division)
# Mistake 5: Decimal vs float for money
# BAD: total = 0.1 + 0.2
from decimal import Decimal
total = Decimal('0.1') + Decimal('0.2') # Correct
# Mistake 6: Forgetting math.isclose for comparisons
a = 0.1 + 0.2
b = 0.3
# if a == b: # Unreliable!
if math.isclose(a, b): # Reliable
print("Equal")
# Mistake 7: Using random without seeding
# Each run gives different results
random.seed(42) # Reproducible results
Key Takeaways
- Use
math for advanced math functions and constants
- Use
statistics for quick descriptive statistics
- Use
Decimal for financial/precision calculations
- Use
Fraction for exact rational arithmetic
math.fsum() for accurate floating-point sums
math.isclose() for safe float comparison
- Use
random.seed() for reproducible simulations
- Use
statistics.median() for outlier-resistant central tendency
- Use
statistics.stdev() for sample data, pstdev() for population
- Use
random.gauss() for normally distributed random numbers