PostgreSQL Cheatsheet

Schemas, queries, joins, indexes, transactions, window functions, JSON, performance tuning & more

Database / SQL

Contents

Database Basics DDL — Schema DML — CRUD Joins Aggregation Window Functions CTEs & Subqueries Indexes Transactions JSON Operations Views & Materialized Views Functions & Procedures Performance Tuning Administration

🐘

Database Basics

-- Connect
psql -h localhost -U postgres -d mydb

-- Create / Drop database
CREATE DATABASE mydb;
DROP DATABASE mydb;

-- List databases & tables (psql shortcuts)
\l                  -- list databases
\c mydb             -- connect to database
\dt                 -- list tables
\d users            -- describe table structure
\di                 -- list indexes
\df                 -- list functions
\du                 -- list roles/users
\timing             -- toggle query timing

🏗️

DDL — Schema Definition

Create Table

CREATE TABLE users (
    id          SERIAL PRIMARY KEY,
    email       VARCHAR(255) UNIQUE NOT NULL,
    name        VARCHAR(100) NOT NULL,
    age         INT CHECK (age >= 0 AND age <= 150),
    role        VARCHAR(20) DEFAULT 'user',
    created_at  TIMESTAMPTZ DEFAULT NOW()
);

CREATE TABLE orders (
    id          SERIAL PRIMARY KEY,
    user_id     INT REFERENCES users(id) ON DELETE CASCADE,
    total       DECIMAL(10,2) NOT NULL,
    status      VARCHAR(20) DEFAULT 'pending',
    ordered_at  TIMESTAMPTZ DEFAULT NOW()
);

Alter Table

ALTER TABLE users ADD COLUMN phone VARCHAR(20);
ALTER TABLE users DROP COLUMN phone;
ALTER TABLE users ALTER COLUMN name SET NOT NULL;
ALTER TABLE users RENAME COLUMN name TO full_name;
ALTER TABLE users ADD CONSTRAINT unique_email UNIQUE(email);

Common Data Types

Type	Description	Example
`SERIAL / BIGSERIAL`	Auto-increment integer	Primary keys
`UUID`	Universally unique identifier	`gen_random_uuid()`
`VARCHAR(n)`	Variable-length string up to n chars	Names, emails
`TEXT`	Unlimited-length string	Descriptions, content
`INT / BIGINT`	32-bit / 64-bit integer	Counts, foreign keys
`DECIMAL(p,s)`	Exact numeric with precision	Money, prices
`BOOLEAN`	true / false	Flags
`TIMESTAMPTZ`	Timestamp with timezone	Always use this over TIMESTAMP
`JSONB`	Binary JSON (indexable, faster)	Flexible schema data
`ARRAY`	Array of any type	`INT[], TEXT[]`

✏️

DML — CRUD Operations

-- INSERT
INSERT INTO users (email, name, age) VALUES
    ('alice@example.com', 'Alice', 30),
    ('bob@example.com', 'Bob', 25);

-- INSERT with conflict handling (upsert)
INSERT INTO users (email, name)
VALUES ('alice@example.com', 'Alice Updated')
ON CONFLICT (email) DO UPDATE SET name = EXCLUDED.name;

-- SELECT
SELECT * FROM users WHERE age > 25 ORDER BY name LIMIT 10;
SELECT DISTINCT role FROM users;
SELECT * FROM users WHERE name ILIKE '%alice%';  -- case-insensitive
SELECT * FROM users WHERE role IN ('admin', 'moderator');
SELECT * FROM users WHERE created_at BETWEEN '2024-01-01' AND '2024-12-31';

-- UPDATE
UPDATE users SET role = 'admin' WHERE email = 'alice@example.com';

-- DELETE
DELETE FROM users WHERE age < 18;
TRUNCATE TABLE users RESTART IDENTITY CASCADE;  -- fast delete all

-- RETURNING (get affected rows)
INSERT INTO users (email, name) VALUES ('c@d.com', 'C') RETURNING id, email;
DELETE FROM users WHERE id = 5 RETURNING *;

🔗

Joins

INNER JOIN LEFT JOIN RIGHT JOIN FULL OUTER JOIN ┌───┬───┐ ┌───┬───┐ ┌───┬───┐ ┌───┬───┐ │ A │▓▓▓│ │▓▓▓│▓▓▓│ │ │▓▓▓│ │▓▓▓│▓▓▓│ │ │▓▓▓│ B │▓▓▓│▓▓▓│ B │ │▓▓▓│ B │▓▓▓│▓▓▓│ B │ │▓▓▓│ │▓▓▓│▓▓▓│ │ │▓▓▓│ │▓▓▓│▓▓▓│ └───┴───┘ └───┴───┘ └───┴───┘ └───┴───┘ Only matching All A + matching Matching + All B All A + All B

-- INNER JOIN (only matching rows)
SELECT u.name, o.total
FROM users u
INNER JOIN orders o ON u.id = o.user_id;

-- LEFT JOIN (all users, even without orders)
SELECT u.name, COALESCE(SUM(o.total), 0) AS total_spent
FROM users u
LEFT JOIN orders o ON u.id = o.user_id
GROUP BY u.name;

-- CROSS JOIN (every combination — cartesian product)
SELECT * FROM colors CROSS JOIN sizes;

-- SELF JOIN (employees and their managers)
SELECT e.name AS employee, m.name AS manager
FROM employees e
LEFT JOIN employees m ON e.manager_id = m.id;

-- LATERAL JOIN (correlated subquery for each row)
SELECT u.name, latest.total
FROM users u
LEFT JOIN LATERAL (
    SELECT total FROM orders WHERE user_id = u.id
    ORDER BY ordered_at DESC LIMIT 1
) latest ON TRUE;

📊

Aggregation

-- Basic aggregates
SELECT
    COUNT(*) AS total_users,
    COUNT(DISTINCT role) AS unique_roles,
    AVG(age) AS avg_age,
    MIN(age) AS youngest,
    MAX(age) AS oldest,
    SUM(age) AS age_sum
FROM users;

-- GROUP BY with HAVING
SELECT role, COUNT(*) AS cnt
FROM users
GROUP BY role
HAVING COUNT(*) > 5
ORDER BY cnt DESC;

-- GROUPING SETS (multiple groupings in one query)
SELECT role, status, COUNT(*)
FROM users
GROUP BY GROUPING SETS ((role), (status), ());

-- String aggregation
SELECT role, STRING_AGG(name, ', ' ORDER BY name) AS members
FROM users GROUP BY role;

-- Array aggregation
SELECT role, ARRAY_AGG(name) AS member_list
FROM users GROUP BY role;

🪟

Window Functions

Window functions perform calculations across a set of rows related to the current row — without collapsing rows like GROUP BY.

-- ROW_NUMBER, RANK, DENSE_RANK
SELECT name, role, age,
    ROW_NUMBER() OVER (PARTITION BY role ORDER BY age DESC) AS row_num,
    RANK()       OVER (ORDER BY age DESC) AS rnk,
    DENSE_RANK() OVER (ORDER BY age DESC) AS dense_rnk
FROM users;

-- LAG / LEAD (previous/next row value)
SELECT ordered_at, total,
    total - LAG(total) OVER (ORDER BY ordered_at) AS change
FROM orders;

-- Running total
SELECT ordered_at, total,
    SUM(total) OVER (ORDER BY ordered_at) AS running_total
FROM orders;

-- NTILE (divide into N buckets)
SELECT name, age,
    NTILE(4) OVER (ORDER BY age) AS quartile
FROM users;

-- Percent rank & percentile
SELECT name, age,
    PERCENT_RANK() OVER (ORDER BY age) AS pct_rank
FROM users;

💡 RANK vs DENSE_RANK vs ROW_NUMBER

ROW_NUMBER: Always unique (1, 2, 3, 4, 5)
RANK: Ties get same rank, gaps after (1, 2, 2, 4, 5)
DENSE_RANK: Ties get same rank, no gaps (1, 2, 2, 3, 4)

🔁

CTEs & Subqueries

-- Common Table Expression (WITH clause)
WITH active_users AS (
    SELECT * FROM users WHERE role != 'inactive'
),
user_orders AS (
    SELECT user_id, SUM(total) AS total_spent
    FROM orders GROUP BY user_id
)
SELECT a.name, COALESCE(uo.total_spent, 0) AS spent
FROM active_users a
LEFT JOIN user_orders uo ON a.id = uo.user_id;

-- Recursive CTE (org hierarchy)
WITH RECURSIVE org_tree AS (
    -- Base case: top-level managers
    SELECT id, name, manager_id, 1 AS depth
    FROM employees WHERE manager_id IS NULL

    UNION ALL

    -- Recursive case: find subordinates
    SELECT e.id, e.name, e.manager_id, t.depth + 1
    FROM employees e
    JOIN org_tree t ON e.manager_id = t.id
)
SELECT * FROM org_tree ORDER BY depth, name;

-- Subquery in WHERE
SELECT * FROM users
WHERE id IN (SELECT user_id FROM orders WHERE total > 100);

-- EXISTS (more efficient than IN for large sets)
SELECT * FROM users u
WHERE EXISTS (
    SELECT 1 FROM orders o WHERE o.user_id = u.id AND o.total > 100
);

⚡

Indexes

Indexes speed up reads but slow down writes. Choose wisely based on query patterns.

-- B-tree (default, most common)
CREATE INDEX idx_users_email ON users(email);

-- Unique index
CREATE UNIQUE INDEX idx_users_email_unique ON users(email);

-- Composite index (multi-column)
CREATE INDEX idx_orders_user_date ON orders(user_id, ordered_at DESC);

-- Partial index (only index subset of rows)
CREATE INDEX idx_active_users ON users(email) WHERE role = 'active';

-- GIN index (for JSONB, arrays, full-text search)
CREATE INDEX idx_users_metadata ON users USING GIN(metadata);

-- GiST index (for geometric, range, full-text)
CREATE INDEX idx_locations_geo ON locations USING GIST(coordinates);

-- Expression index
CREATE INDEX idx_users_lower_email ON users(LOWER(email));

-- Concurrent index creation (non-blocking)
CREATE INDEX CONCURRENTLY idx_name ON users(name);

-- Drop index
DROP INDEX idx_users_email;

Index Type	Best For	Example
B-tree	=, <, >, BETWEEN, LIKE 'abc%'	Most columns
Hash	Equality (=) only	Exact lookups
GIN	JSONB, arrays, full-text, tsvector	`metadata @> '{"a":1}'`
GiST	Geometry, ranges, nearest-neighbor	PostGIS, range queries
BRIN	Very large, naturally ordered tables	Time-series data (created_at)

🔐

Transactions

ACID Properties

Atomicity: All-or-nothing — either all statements succeed or none
Consistency: Database moves from one valid state to another
Isolation: Concurrent transactions don't interfere with each other
Durability: Once committed, data survives crashes

BEGIN;  -- or START TRANSACTION

UPDATE accounts SET balance = balance - 100 WHERE id = 1;
UPDATE accounts SET balance = balance + 100 WHERE id = 2;

-- If everything is OK:
COMMIT;

-- If something went wrong:
ROLLBACK;

-- Savepoints (partial rollback)
BEGIN;
INSERT INTO users (...) VALUES (...);
SAVEPOINT sp1;
INSERT INTO orders (...) VALUES (...);  -- this might fail
ROLLBACK TO sp1;  -- undo only the order insert
COMMIT;  -- user insert is still committed

Isolation Levels

Level	Dirty Read	Non-Repeatable Read	Phantom Read	Use Case
Read Uncommitted	✅ Possible	✅ Possible	✅ Possible	Almost never used in Postgres
Read Committed	❌ No	✅ Possible	✅ Possible	Default in PostgreSQL
Repeatable Read	❌ No	❌ No	❌ No*	Financial reports, analytics
Serializable	❌ No	❌ No	❌ No	Strictest — as if sequential

* PostgreSQL's Repeatable Read is based on MVCC snapshots and prevents phantom reads too.

📋

JSON Operations

Use JSONB over JSON — it's binary, faster, and indexable.

-- Create table with JSONB
CREATE TABLE events (
    id    SERIAL PRIMARY KEY,
    data  JSONB NOT NULL
);

-- Insert JSON
INSERT INTO events (data) VALUES
    ('{"type": "click", "page": "/home", "user_id": 42}'::jsonb);

-- Access JSON fields
SELECT
    data->'type'          AS type_json,    -- returns JSONB: "click"
    data->>'type'         AS type_text,    -- returns TEXT: click
    data->'user'->'name'  AS nested_json,  -- nested access
    data#>>'{user,name}'  AS nested_path   -- path access as text
FROM events;

-- Filter by JSON field
SELECT * FROM events WHERE data->>'type' = 'click';

-- Containment operator (uses GIN index!)
SELECT * FROM events WHERE data @> '{"type": "click"}'::jsonb;

-- Update JSON fields
UPDATE events SET data = data || '{"processed": true}'::jsonb;
UPDATE events SET data = data - 'temporary_field';  -- remove key
UPDATE events SET data = jsonb_set(data, '{page}', '"/"');

-- Aggregate to JSON
SELECT jsonb_agg(jsonb_build_object('name', name, 'age', age))
FROM users;

👁️

Views & Materialized Views

-- Regular view (virtual — query runs every time)
CREATE VIEW active_users AS
    SELECT id, name, email FROM users WHERE role = 'active';

SELECT * FROM active_users;  -- uses the view

-- Materialized view (cached — must refresh manually)
CREATE MATERIALIZED VIEW user_stats AS
    SELECT u.id, u.name,
        COUNT(o.id) AS order_count,
        COALESCE(SUM(o.total), 0) AS total_spent
    FROM users u
    LEFT JOIN orders o ON u.id = o.user_id
    GROUP BY u.id;

-- Refresh materialized view
REFRESH MATERIALIZED VIEW CONCURRENTLY user_stats;
-- CONCURRENTLY requires a UNIQUE index on the view

Regular View

Always fresh data
Query runs each time
No storage cost
Can be slow for complex queries

Materialized View

Cached / snapshot data
Must REFRESH manually
Uses disk storage
Instant reads — great for dashboards

🔧

Functions & Procedures

-- Function (returns a value)
CREATE OR REPLACE FUNCTION get_user_total(uid INT)
RETURNS DECIMAL AS $$
BEGIN
    RETURN (
        SELECT COALESCE(SUM(total), 0)
        FROM orders WHERE user_id = uid
    );
END;
$$ LANGUAGE plpgsql;

-- Use it
SELECT name, get_user_total(id) FROM users;

-- Procedure (no return value, can manage transactions)
CREATE OR REPLACE PROCEDURE transfer_funds(
    from_id INT, to_id INT, amount DECIMAL
) AS $$
BEGIN
    UPDATE accounts SET balance = balance - amount WHERE id = from_id;
    UPDATE accounts SET balance = balance + amount WHERE id = to_id;
    COMMIT;
END;
$$ LANGUAGE plpgsql;

-- Call procedure
CALL transfer_funds(1, 2, 50.00);

🚀

Performance Tuning

EXPLAIN ANALYZE

-- Show query plan with actual execution time
EXPLAIN ANALYZE
SELECT u.name, COUNT(o.id)
FROM users u
JOIN orders o ON u.id = o.user_id
GROUP BY u.name;

-- Key things to look for:
-- Seq Scan → consider adding an index
-- Nested Loop with high rows → may need different join strategy
-- Sort → add index for ORDER BY columns
-- actual time vs planned time discrepancy → stale stats

-- Update statistics
ANALYZE users;
ANALYZE;  -- whole database

Performance Tips

Index columns used in WHERE, JOIN, ORDER BY, GROUP BY
Use partial indexes for frequently filtered subsets
Avoid SELECT * — only fetch columns you need
Use EXISTS over IN for large subquery results
LIMIT early — paginate with LIMIT/OFFSET or keyset pagination
Connection pooling — use PgBouncer to avoid connection overhead
Vacuum — VACUUM ANALYZE to reclaim space and update stats
Batch inserts — use COPY for bulk loading instead of many INSERTs

🛠️

Administration

-- Database size
SELECT pg_size_pretty(pg_database_size('mydb'));

-- Table sizes
SELECT tablename,
    pg_size_pretty(pg_total_relation_size(tablename::regclass)) AS total_size
FROM pg_tables
WHERE schemaname = 'public'
ORDER BY pg_total_relation_size(tablename::regclass) DESC;

-- Active queries
SELECT pid, query, state, now() - query_start AS duration
FROM pg_stat_activity
WHERE state != 'idle' ORDER BY duration DESC;

-- Kill a slow query
SELECT pg_terminate_backend(pid);

-- Backup & Restore
-- pg_dump -U postgres -d mydb -F c -f backup.dump
-- pg_restore -U postgres -d mydb backup.dump

-- Roles & Permissions
CREATE ROLE readonly LOGIN PASSWORD 'secret';
GRANT SELECT ON ALL TABLES IN SCHEMA public TO readonly;
ALTER DEFAULT PRIVILEGES IN SCHEMA public GRANT SELECT ON TABLES TO readonly;