Debug Guide¶
Tools and techniques for debugging IOValence applications.
Enable Debug Logging¶
import logging
from iovalence import Trainer
logging.basicConfig(level=logging.DEBUG)
trainer = Trainer(agent, data_loader, verbose=True)
Debug Trainer¶
from iovalence.debug import DebugTrainer
debug_trainer = DebugTrainer(
agent=agent,
data_loader=data_loader,
debug=True
)
debug_trainer.train()
Profiling¶
Profile Training Speed¶
from iovalence.profiling import profile_training
profile = profile_training(trainer, num_steps=100)
print(profile)
Profile Memory Usage¶
from iovalence.profiling import profile_memory
memory = profile_memory(agent, data_loader)
print(f"Memory usage: {memory} MB")
Validation Checks¶
Data Validation¶
from iovalence.validation import validate_data
issues = validate_data(data_loader)
print(f"Data issues: {issues}")
Model Validation¶
from iovalence.validation import validate_model
issues = validate_model(agent)
print(f"Model issues: {issues}")
Interactive Debugging¶
import pdb
from iovalence import Trainer
trainer = Trainer(agent, data_loader)
# Set breakpoint
pdb.set_trace()
trainer.train()
Tensor Inspection¶
import torch
# Check tensor properties
x = torch.randn(32, 512)
print(f"Shape: {x.shape}")
print(f"Device: {x.device}")
print(f"Dtype: {x.dtype}")
print(f"Requires grad: {x.requires_grad}")
# Check for NaN/Inf
print(f"Has NaN: {torch.isnan(x).any()}")
print(f"Has Inf: {torch.isinf(x).any()}")
Gradient Inspection¶
# Inspect gradients
for name, param in agent.named_parameters():
if param.grad is not None:
print(f"{name}: grad_mean={param.grad.mean()}, grad_max={param.grad.max()}")
Hook Debugging¶
# Register forward hook
def debug_hook(module, input, output):
print(f"Module: {module}")
print(f"Output shape: {output.shape}")
agent.conv1.register_forward_hook(debug_hook)
Visualization¶
Plot Training History¶
from iovalence.visualization import plot_history
history = trainer.history
plot_history(history, metrics=['loss', 'accuracy'])
Visualize Model¶
from iovalence.visualization import visualize_model
visualize_model(agent, input_shape=(1, 512))
Common Debug Patterns¶
Check Device Mismatch¶
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
agent = agent.to(device)
data = data.to(device)
Check Dimension Mismatch¶
try:
output = agent(input_data)
except RuntimeError as e:
print(f"Dimension error: {e}")
print(f"Input shape: {input_data.shape}")
Check for Silent Failures¶
with torch.autograd.set_detect_anomaly(True):
trainer.train()
Performance Debugging¶
# Identify bottlenecks
import time
start = time.time()
# Your code here
end = time.time()
print(f"Execution time: {end - start:.4f} seconds")