Python Tooling — Environments, Testing, and Dev Setup
Environment Management
venv (built-in, simplest)
bash# Create
python -m venv .venv
# Activate
source .venv/bin/activate # Mac/Linux
.venv\Scripts\activate # Windows
# Install & freeze
pip install fastapi openai
pip freeze > requirements.txt
pip install -r requirements.txt
# Deactivate
deactivateuv (2024+ — fastest, recommended)
uv is written in Rust and replaces pip + venv in one tool. 10–100x faster than pip.
bash# Install uv
curl -LsSf https://astral.sh/uv/install.sh | sh
# New project
uv init my-project
cd my-project
# Add dependencies (like npm install)
uv add fastapi openai anthropic
# Add dev dependencies
uv add --dev pytest ruff mypy
# Run in the project's venv
uv run python main.py
uv run pytest
# Sync from pyproject.toml (like npm ci)
uv sync
# Generate lock file
uv lockpyproject.toml (modern standard)
toml[project]
name = "my-ai-app"
version = "0.1.0"
requires-python = ">=3.11"
dependencies = [
"fastapi>=0.115",
"openai>=1.0",
"anthropic>=0.40",
"langchain>=0.3",
"chromadb>=0.5",
"sentence-transformers>=3.0",
"numpy>=1.26",
"pandas>=2.0",
"torch>=2.2",
]
[project.optional-dependencies]
dev = ["pytest", "ruff", "mypy", "httpx"]
[build-system]
requires = ["hatchling"]
build-backend = "hatchling.build"
[tool.ruff]
line-length = 100
target-version = "py311"Code Quality Tools
ruff (fast linter + formatter, replaces flake8/black/isort)
bash# Install
uv add --dev ruff
# Lint
ruff check .
# Fix auto-fixable issues
ruff check . --fix
# Format (like black)
ruff format .
# Config in pyproject.tomltoml[tool.ruff.lint]
select = ["E", "F", "I", "N", "W", "UP"] # errors, pyflakes, isort, naming, warnings, upgrades
ignore = ["E501"] # line too long (handled by formatter)mypy (static type checking)
bashmypy src/ # check types
mypy --strict src/ # strict mode — recommended for new projectspython# Type hints (essential for production AI code)
from typing import Optional, Union
from collections.abc import Iterator, AsyncIterator
def chunk_text(text: str, size: int = 500) -> list[str]: ...
async def stream_tokens(prompt: str) -> AsyncIterator[str]: ...
def get_embedding(text: str) -> list[float]: ...pytest — Testing AI Code
Basic Structure
python# tests/test_chunking.py
import pytest
from src.chunking import chunk_text
def test_basic_chunking():
text = " ".join(["word"] * 100)
chunks = chunk_text(text, chunk_size=20, overlap=5)
assert len(chunks) > 1
assert all(len(c.split()) <= 20 for c in chunks)
def test_empty_text():
assert chunk_text("") == []
def test_single_chunk():
chunks = chunk_text("hello world", chunk_size=100)
assert len(chunks) == 1
assert chunks[0] == "hello world"Fixtures — Shared Setup
python# conftest.py (auto-loaded by pytest)
import pytest
from unittest.mock import AsyncMock, MagicMock
@pytest.fixture
def sample_documents():
return [
{"id": "1", "text": "Python is great for AI", "label": 1},
{"id": "2", "text": "JavaScript runs in browsers", "label": 0},
]
@pytest.fixture
def mock_openai_client():
client = AsyncMock()
client.chat.completions.create.return_value = MagicMock(
choices=[MagicMock(message=MagicMock(content="Mocked response"))]
)
return client
# Use in test
def test_with_docs(sample_documents):
assert len(sample_documents) == 2
async def test_llm_call(mock_openai_client):
result = await call_llm(mock_openai_client, "test prompt")
assert result == "Mocked response"Parametrize — Run Test with Multiple Inputs
python@pytest.mark.parametrize("text, chunk_size, expected_count", [
("word " * 100, 20, 6), # 100 words / (20 - 5 overlap) = 6ish
("short text", 100, 1), # fits in one chunk
("", 50, 0), # empty text
])
def test_chunk_count(text, chunk_size, expected_count):
chunks = chunk_text(text.strip(), chunk_size=chunk_size, overlap=5)
assert len(chunks) == expected_countMocking External APIs
pythonfrom unittest.mock import patch, AsyncMock, MagicMock
import pytest
# Patch at the import location (where it's used, not where it's defined)
@patch('src.llm.openai_client')
async def test_rag_pipeline(mock_client):
mock_client.chat.completions.create = AsyncMock(return_value=MagicMock(
choices=[MagicMock(message=MagicMock(content="answer"))]
))
result = await rag_answer("What is Python?")
assert "answer" in result
mock_client.chat.completions.create.assert_called_once()
# Context manager style (preferred in many cases)
def test_embedding():
with patch('src.embeddings.get_embedding') as mock_embed:
mock_embed.return_value = [0.1, 0.2, 0.3]
result = search("query")
mock_embed.assert_called_with("query")Testing Async Code
pythonimport pytest
import asyncio
# Mark as async — requires pytest-asyncio
@pytest.mark.asyncio
async def test_async_function():
result = await some_async_function()
assert result is not None
# conftest.py — set asyncio mode
# Or in pyproject.toml:toml[tool.pytest.ini_options]
asyncio_mode = "auto" # automatically treat async tests as async
testpaths = ["tests"]Testing ML Models (Smoke Tests)
pythonimport pytest
import numpy as np
@pytest.fixture(scope="module")
def trained_model():
"""Load once per test module — expensive models."""
from sklearn.ensemble import RandomForestClassifier
import joblib
return joblib.load("model.pkl")
def test_model_output_shape(trained_model):
X = np.random.randn(10, 20) # 10 samples, 20 features
preds = trained_model.predict(X)
assert preds.shape == (10,)
def test_model_probabilities(trained_model):
X = np.random.randn(5, 20)
probs = trained_model.predict_proba(X)
assert probs.shape == (5, 2) # binary classification
assert np.allclose(probs.sum(axis=1), 1.0) # probabilities sum to 1
def test_model_output_range(trained_model):
X = np.random.randn(100, 20)
preds = trained_model.predict(X)
assert set(preds).issubset({0, 1}) # only valid classespytest Commands
bash# Run all tests
pytest
# Verbose
pytest -v
# Run specific file
pytest tests/test_chunking.py
# Run specific test
pytest tests/test_chunking.py::test_basic_chunking
# Stop on first failure
pytest -x
# Show stdout (print statements)
pytest -s
# Coverage
pytest --cov=src --cov-report=term-missing
# Parallel execution (pip install pytest-xdist)
pytest -n 4Project Structure Best Practice
my-ai-project/
├── pyproject.toml # dependencies, tool config
├── uv.lock # lockfile (commit this)
├── src/
│ └── my_app/
│ ├── __init__.py
│ ├── chunking.py
│ ├── embeddings.py
│ ├── rag.py
│ └── api.py # FastAPI app
├── tests/
│ ├── conftest.py # shared fixtures
│ ├── test_chunking.py
│ ├── test_embeddings.py
│ └── test_api.py # FastAPI TestClient tests
├── scripts/
│ └── ingest.py # one-off scripts
├── .env # API keys (never commit!)
├── .env.example # template (commit this)
└── DockerfileEnvironment Variables (python-dotenv)
python# .env
OPENAI_API_KEY=sk-...
ANTHROPIC_API_KEY=sk-ant-...
CHROMA_HOST=localhost
# config.py
from pydantic_settings import BaseSettings
class Settings(BaseSettings):
openai_api_key: str
anthropic_api_key: str
chroma_host: str = "localhost"
class Config:
env_file = ".env"
settings = Settings()FastAPI TestClient
pythonfrom fastapi.testclient import TestClient
from src.api import app
client = TestClient(app)
def test_health():
response = client.get("/health")
assert response.status_code == 200
assert response.json() == {"status": "ok"}
def test_ask_endpoint():
response = client.post("/ask", json={"question": "What is Python?"})
assert response.status_code == 200
assert "answer" in response.json()
# Async endpoints — use httpx.AsyncClient
import pytest
import httpx
@pytest.mark.asyncio
async def test_streaming():
async with httpx.AsyncClient(app=app, base_url="http://test") as ac:
response = await ac.post("/stream", json={"question": "test"})
assert response.status_code == 200Interview Q&A
Q: How do you test code that makes LLM API calls?
Mock the API client so tests don't make real calls (slow + cost money). Use unittest.mock.patch to replace the client at the import path. Assert that the mock was called with the expected arguments, and set return_value to a realistic response object.
Q: What is conftest.py in pytest?
A special file pytest automatically discovers. It contains fixtures that are shared across multiple test files without explicit imports. Fixtures in conftest.py at the project root are available to all tests. You can have multiple conftest.py files at different directory levels.
Q: What's the difference between scope="function" and scope="module" for fixtures?
scope="function" (default) — fixture created fresh for every test function. scope="module" — created once per test file, shared across all tests in that file. For expensive setup like loading ML models, use scope="module" or scope="session" (once for the entire test run). The tradeoff is test isolation vs speed.