5 min read
NumPy for AI/ML
Why NumPy?
Pure Python loops are ~100x slower than NumPy vectorized operations. All ML frameworks (PyTorch, TensorFlow, scikit-learn) use NumPy arrays or tensors built on the same concepts.
pythonimport numpy as np
# Pure Python — slow
result = [x**2 for x in range(1_000_000)] # ~0.3s
# NumPy — fast (C under the hood)
result = np.arange(1_000_000) ** 2 # ~0.003s (100x faster)Array Creation
python# From Python
a = np.array([1, 2, 3]) # 1D: shape (3,)
m = np.array([[1, 2], [3, 4]]) # 2D: shape (2, 2)
t = np.array([[[1,2],[3,4]],[[5,6],[7,8]]]) # 3D
# Built-in creators
np.zeros((3, 4)) # 3×4 zeros (float64 by default)
np.ones((2, 3), dtype=int) # 2×3 ones as ints
np.eye(3) # 3×3 identity matrix
np.full((2, 3), 7) # 2×3 filled with 7
np.empty((2, 2)) # uninitialized (fast, values random)
np.arange(0, 10, 2) # [0, 2, 4, 6, 8]
np.linspace(0, 1, 5) # [0.0, 0.25, 0.5, 0.75, 1.0]
# Random
np.random.seed(42)
np.random.rand(3, 4) # uniform [0, 1)
np.random.randn(3, 4) # standard normal (mean=0, std=1)
np.random.randint(0, 10, size=(3,)) # [7, 2, 5]
np.random.choice([1,2,3,4], size=5, replace=True)
np.random.shuffle(arr) # in-place shuffle
# AI-specific patterns
embeddings = np.random.randn(100, 1536) # 100 embeddings of dim 1536
batch = np.zeros((32, 224, 224, 3)) # batch of 32 RGB images 224×224Shape & Reshaping
pythona = np.arange(24)
a.shape # (24,)
a.ndim # 1
a.size # 24 (total elements)
a.dtype # dtype('int64')
# Reshape — total elements must match
a.reshape(4, 6) # shape (4, 6)
a.reshape(2, 3, 4) # shape (2, 3, 4)
a.reshape(-1, 6) # -1 = "figure it out" → shape (4, 6)
a.reshape(4, -1) # shape (4, 6)
# Flatten
m.flatten() # always returns a copy
m.ravel() # returns view when possible (faster)
# Add/remove dimensions
a = np.array([1, 2, 3]) # shape (3,)
np.expand_dims(a, axis=0) # shape (1, 3) — add batch dim
np.expand_dims(a, axis=1) # shape (3, 1)
a[np.newaxis, :] # shape (1, 3)
a[:, np.newaxis] # shape (3, 1)
m = np.ones((1, 3, 1))
m.squeeze() # shape (3,) — remove size-1 dims
m.squeeze(axis=0) # shape (3, 1)
# Transpose
m = np.random.randn(3, 4)
m.T # shape (4, 3)
m.transpose(1, 0) # same as .T for 2DBroadcasting — The Key Concept
NumPy automatically expands smaller arrays to match larger ones:
python# Rules: align shapes from the right, 1s can be broadcast to any size
a = np.array([[1, 2, 3], # shape (2, 3)
[4, 5, 6]])
b = np.array([10, 20, 30]) # shape (3,) → broadcast to (2, 3)
a + b # [[11, 22, 33], [14, 25, 36]]
# Practical AI use case: normalize embeddings
embeddings = np.random.randn(100, 1536) # (100, 1536)
mean = embeddings.mean(axis=0) # (1536,)
std = embeddings.std(axis=0) # (1536,)
normalized = (embeddings - mean) / std # (100, 1536) - (1536,) → broadcast!
# Cosine similarity between query and all docs
query = np.random.randn(1536) # (1536,)
docs = np.random.randn(100, 1536) # (100, 1536)
# Normalize
query_norm = query / np.linalg.norm(query)
docs_norm = docs / np.linalg.norm(docs, axis=1, keepdims=True)
# Dot product
similarities = docs_norm @ query_norm # shape (100,) — similarity to each doc
top5 = np.argsort(similarities)[::-1][:5] # indices of top 5Indexing & Slicing
pythona = np.arange(10) # [0, 1, 2, ..., 9]
# Basic
a[3] # 3
a[-1] # 9
a[2:5] # [2, 3, 4]
a[::2] # [0, 2, 4, 6, 8]
a[::-1] # [9, 8, ..., 0]
# 2D
m = np.arange(12).reshape(3, 4)
m[1, 2] # element at row 1, col 2
m[1, :] # entire row 1
m[:, 2] # entire col 2
m[0:2, 1:3] # submatrix rows 0-1, cols 1-2
# Boolean indexing (masking)
a = np.array([1, -2, 3, -4, 5])
mask = a > 0
a[mask] # [1, 3, 5]
a[a > 0] = 0 # set positives to zero: [-0, -2, -0, -4, -0]
# Fancy indexing
indices = np.array([0, 2, 4])
a[indices] # [a[0], a[2], a[4]]
# np.where (vectorized if/else)
np.where(a > 0, a, 0) # keep positive, zero out negative
np.where(a > 0, "pos", "neg") # string labelsMath Operations
pythona = np.array([1.0, 4.0, 9.0])
# Element-wise
a + 1; a * 2; a ** 2; a / 2
np.sqrt(a) # [1.0, 2.0, 3.0]
np.log(a) # natural log
np.log2(a); np.log10(a)
np.exp(a) # e^x
np.abs(a)
np.sign(a) # -1, 0, 1
np.clip(a, 0, 5) # clamp values to [0, 5]
# Aggregation
a.sum(); a.mean(); a.std(); a.var()
a.min(); a.max()
a.argmin(); a.argmax() # index of min/max
# Axis-wise
m = np.random.randn(3, 4)
m.sum(axis=0) # shape (4,) — sum each column
m.sum(axis=1) # shape (3,) — sum each row
m.mean(axis=0, keepdims=True) # shape (1, 4) — keeps dims
# Linear algebra
np.dot(a, b) # dot product
a @ b # same (preferred)
np.linalg.norm(v) # L2 norm
np.linalg.norm(m, axis=1) # L2 norm per row
np.linalg.inv(m) # inverse
np.linalg.det(m) # determinant
U, S, Vt = np.linalg.svd(m) # SVD decomposition
# Sorting
np.sort(a) # sorted copy
np.argsort(a) # indices that would sort a
np.argsort(a)[::-1] # descending (top-k pattern)AI/ML Patterns in NumPy
python# Softmax (neural network output layer)
def softmax(x: np.ndarray) -> np.ndarray:
x = x - x.max() # numerical stability
e = np.exp(x)
return e / e.sum()
logits = np.array([2.0, 1.0, 0.1])
probs = softmax(logits) # [0.659, 0.242, 0.099]
# Sigmoid
def sigmoid(x): return 1 / (1 + np.exp(-x))
# ReLU
def relu(x): return np.maximum(0, x)
# One-hot encoding
def one_hot(labels, num_classes):
return np.eye(num_classes)[labels]
labels = np.array([0, 2, 1])
one_hot(labels, 3)
# [[1,0,0], [0,0,1], [0,1,0]]
# Batch matrix multiply
# X: (batch, seq, d_model), W: (d_model, d_out)
# Result: (batch, seq, d_out)
result = X @ W # broadcasting handles batch dim
# Cosine similarity matrix (all vs all)
def cosine_similarity_matrix(A, B):
A_norm = A / np.linalg.norm(A, axis=1, keepdims=True)
B_norm = B / np.linalg.norm(B, axis=1, keepdims=True)
return A_norm @ B_norm.T # shape (len(A), len(B))Memory & Performance
python# dtype controls memory usage
arr_float64 = np.ones((1000, 1000)) # 8MB (default)
arr_float32 = np.ones((1000, 1000), dtype=np.float32) # 4MB
arr_int8 = np.ones((1000, 1000), dtype=np.int8) # 1MB
# ML models use float32 (GPU default), not float64
embeddings = embeddings.astype(np.float32)
# Views vs copies — views share memory (no copy, fast)
a = np.arange(10)
b = a[2:5] # view — modifying b modifies a
c = a[2:5].copy() # explicit copy
# Check if view or copy
b.base is a # True → b is a view of a
# Memory usage
arr.nbytes # total bytes
arr.itemsize # bytes per element
# Memmap — work with arrays larger than RAM
fp = np.memmap("big_array.dat", dtype=np.float32, mode="w+", shape=(1_000_000, 1536))
fp[0] = embeddings[0] # writes to disk, only loads what's needed[prev·next]