Tensors, autograd, models, training loops & common operations
AI / MLimport torch
# Create tensors
x = torch.tensor([1, 2, 3])
x = torch.zeros(3, 4)
x = torch.ones(2, 3)
x = torch.randn(3, 4) # normal dist
x = torch.rand(3, 4) # uniform [0,1)
x = torch.arange(0, 10)
x = torch.linspace(0, 1, 100)
x = torch.eye(4)
x = torch.empty(3, 3)
# From numpy
x = torch.from_numpy(np_array)
np_array = x.numpy()
# Properties
x.shape x.dtype x.device x.requires_grad# Element-wise
x + y x - y x * y x / y x ** 2
torch.sqrt(x) torch.exp(x) torch.log(x)
torch.abs(x) torch.clamp(x, 0, 1)
# Matrix
x @ y # matmul
torch.mm(x, y) # matmul 2D
torch.bmm(x, y) # batch matmul
x.T # transpose
# Reduction
x.sum() x.mean() x.max() x.min()
x.sum(dim=0) x.argmax(dim=1)
# Reshape
x.view(2, -1) x.reshape(2, -1)
x.unsqueeze(0) x.squeeze()
x.flatten() x.permute(2,0,1)
torch.cat([x,y], dim=0) torch.stack([x,y])# Enable gradient tracking
x = torch.tensor([2.0], requires_grad=True)
y = x ** 2 + 3 * x + 1
y.backward() # compute gradients
x.grad # dy/dx = 2x + 3 = 7
# Detach from graph
z = x.detach()
with torch.no_grad():
# inference only
pred = model(x)
# Zero gradients
optimizer.zero_grad()import torch.nn as nn
class Net(nn.Module):
def __init__(self):
super().__init__()
self.fc1 = nn.Linear(784, 128)
self.fc2 = nn.Linear(128, 10)
self.relu = nn.ReLU()
self.dropout = nn.Dropout(0.2)
def forward(self, x):
x = self.relu(self.fc1(x))
x = self.dropout(x)
return self.fc2(x)
# Sequential
model = nn.Sequential(
nn.Linear(784, 128),
nn.ReLU(),
nn.Linear(128, 10)
)
# Common layers
nn.Linear nn.Conv2d nn.LSTM nn.GRU
nn.BatchNorm1d nn.LayerNorm nn.Embeddingmodel = Net()
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)
for epoch in range(10):
model.train()
for batch_x, batch_y in train_loader:
optimizer.zero_grad()
output = model(batch_x)
loss = criterion(output, batch_y)
loss.backward()
optimizer.step()
# Validation
model.eval()
with torch.no_grad():
for val_x, val_y in val_loader:
pred = model(val_x)
# Save / Load
torch.save(model.state_dict(), "model.pth")
model.load_state_dict(torch.load("model.pth"))from torch.utils.data import Dataset, DataLoader
class MyDataset(Dataset):
def __init__(self, X, y):
self.X = torch.tensor(X, dtype=torch.float32)
self.y = torch.tensor(y, dtype=torch.long)
def __len__(self): return len(self.X)
def __getitem__(self, idx): return self.X[idx], self.y[idx]
loader = DataLoader(dataset, batch_size=32, shuffle=True, num_workers=4)device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = model.to(device)
x = x.to(device)
# Apple Silicon
device = torch.device("mps" if torch.backends.mps.is_available() else "cpu")
# Multi-GPU
model = nn.DataParallel(model)
# Memory
torch.cuda.empty_cache()
torch.cuda.memory_summary()