PostgreSQL Cheatsheet

Schemas, queries, joins, indexes, transactions, window functions, JSON, performance tuning & more

Database / SQL
Contents
๐Ÿ˜

Database Basics

-- Connect
psql -h localhost -U postgres -d mydb

-- Create / Drop database
CREATE DATABASE mydb;
DROP DATABASE mydb;

-- List databases & tables (psql shortcuts)
\l                  -- list databases
\c mydb             -- connect to database
\dt                 -- list tables
\d users            -- describe table structure
\di                 -- list indexes
\df                 -- list functions
\du                 -- list roles/users
\timing             -- toggle query timing
๐Ÿ—๏ธ

DDL โ€” Schema Definition

Create Table

CREATE TABLE users (
    id          SERIAL PRIMARY KEY,
    email       VARCHAR(255) UNIQUE NOT NULL,
    name        VARCHAR(100) NOT NULL,
    age         INT CHECK (age >= 0 AND age <= 150),
    role        VARCHAR(20) DEFAULT 'user',
    created_at  TIMESTAMPTZ DEFAULT NOW()
);

CREATE TABLE orders (
    id          SERIAL PRIMARY KEY,
    user_id     INT REFERENCES users(id) ON DELETE CASCADE,
    total       DECIMAL(10,2) NOT NULL,
    status      VARCHAR(20) DEFAULT 'pending',
    ordered_at  TIMESTAMPTZ DEFAULT NOW()
);

Alter Table

ALTER TABLE users ADD COLUMN phone VARCHAR(20);
ALTER TABLE users DROP COLUMN phone;
ALTER TABLE users ALTER COLUMN name SET NOT NULL;
ALTER TABLE users RENAME COLUMN name TO full_name;
ALTER TABLE users ADD CONSTRAINT unique_email UNIQUE(email);

Common Data Types

TypeDescriptionExample
SERIAL / BIGSERIALAuto-increment integerPrimary keys
UUIDUniversally unique identifiergen_random_uuid()
VARCHAR(n)Variable-length string up to n charsNames, emails
TEXTUnlimited-length stringDescriptions, content
INT / BIGINT32-bit / 64-bit integerCounts, foreign keys
DECIMAL(p,s)Exact numeric with precisionMoney, prices
BOOLEANtrue / falseFlags
TIMESTAMPTZTimestamp with timezoneAlways use this over TIMESTAMP
JSONBBinary JSON (indexable, faster)Flexible schema data
ARRAYArray of any typeINT[], TEXT[]
โœ๏ธ

DML โ€” CRUD Operations

-- INSERT
INSERT INTO users (email, name, age) VALUES
    ('alice@example.com', 'Alice', 30),
    ('bob@example.com', 'Bob', 25);

-- INSERT with conflict handling (upsert)
INSERT INTO users (email, name)
VALUES ('alice@example.com', 'Alice Updated')
ON CONFLICT (email) DO UPDATE SET name = EXCLUDED.name;

-- SELECT
SELECT * FROM users WHERE age > 25 ORDER BY name LIMIT 10;
SELECT DISTINCT role FROM users;
SELECT * FROM users WHERE name ILIKE '%alice%';  -- case-insensitive
SELECT * FROM users WHERE role IN ('admin', 'moderator');
SELECT * FROM users WHERE created_at BETWEEN '2024-01-01' AND '2024-12-31';

-- UPDATE
UPDATE users SET role = 'admin' WHERE email = 'alice@example.com';

-- DELETE
DELETE FROM users WHERE age < 18;
TRUNCATE TABLE users RESTART IDENTITY CASCADE;  -- fast delete all

-- RETURNING (get affected rows)
INSERT INTO users (email, name) VALUES ('c@d.com', 'C') RETURNING id, email;
DELETE FROM users WHERE id = 5 RETURNING *;
๐Ÿ”—

Joins

INNER JOIN LEFT JOIN RIGHT JOIN FULL OUTER JOIN โ”Œโ”€โ”€โ”€โ”ฌโ”€โ”€โ”€โ” โ”Œโ”€โ”€โ”€โ”ฌโ”€โ”€โ”€โ” โ”Œโ”€โ”€โ”€โ”ฌโ”€โ”€โ”€โ” โ”Œโ”€โ”€โ”€โ”ฌโ”€โ”€โ”€โ” โ”‚ A โ”‚โ–“โ–“โ–“โ”‚ โ”‚โ–“โ–“โ–“โ”‚โ–“โ–“โ–“โ”‚ โ”‚ โ”‚โ–“โ–“โ–“โ”‚ โ”‚โ–“โ–“โ–“โ”‚โ–“โ–“โ–“โ”‚ โ”‚ โ”‚โ–“โ–“โ–“โ”‚ B โ”‚โ–“โ–“โ–“โ”‚โ–“โ–“โ–“โ”‚ B โ”‚ โ”‚โ–“โ–“โ–“โ”‚ B โ”‚โ–“โ–“โ–“โ”‚โ–“โ–“โ–“โ”‚ B โ”‚ โ”‚โ–“โ–“โ–“โ”‚ โ”‚โ–“โ–“โ–“โ”‚โ–“โ–“โ–“โ”‚ โ”‚ โ”‚โ–“โ–“โ–“โ”‚ โ”‚โ–“โ–“โ–“โ”‚โ–“โ–“โ–“โ”‚ โ””โ”€โ”€โ”€โ”ดโ”€โ”€โ”€โ”˜ โ””โ”€โ”€โ”€โ”ดโ”€โ”€โ”€โ”˜ โ””โ”€โ”€โ”€โ”ดโ”€โ”€โ”€โ”˜ โ””โ”€โ”€โ”€โ”ดโ”€โ”€โ”€โ”˜ Only matching All A + matching Matching + All B All A + All B
-- INNER JOIN (only matching rows)
SELECT u.name, o.total
FROM users u
INNER JOIN orders o ON u.id = o.user_id;

-- LEFT JOIN (all users, even without orders)
SELECT u.name, COALESCE(SUM(o.total), 0) AS total_spent
FROM users u
LEFT JOIN orders o ON u.id = o.user_id
GROUP BY u.name;

-- CROSS JOIN (every combination โ€” cartesian product)
SELECT * FROM colors CROSS JOIN sizes;

-- SELF JOIN (employees and their managers)
SELECT e.name AS employee, m.name AS manager
FROM employees e
LEFT JOIN employees m ON e.manager_id = m.id;

-- LATERAL JOIN (correlated subquery for each row)
SELECT u.name, latest.total
FROM users u
LEFT JOIN LATERAL (
    SELECT total FROM orders WHERE user_id = u.id
    ORDER BY ordered_at DESC LIMIT 1
) latest ON TRUE;
๐Ÿ“Š

Aggregation

-- Basic aggregates
SELECT
    COUNT(*) AS total_users,
    COUNT(DISTINCT role) AS unique_roles,
    AVG(age) AS avg_age,
    MIN(age) AS youngest,
    MAX(age) AS oldest,
    SUM(age) AS age_sum
FROM users;

-- GROUP BY with HAVING
SELECT role, COUNT(*) AS cnt
FROM users
GROUP BY role
HAVING COUNT(*) > 5
ORDER BY cnt DESC;

-- GROUPING SETS (multiple groupings in one query)
SELECT role, status, COUNT(*)
FROM users
GROUP BY GROUPING SETS ((role), (status), ());

-- String aggregation
SELECT role, STRING_AGG(name, ', ' ORDER BY name) AS members
FROM users GROUP BY role;

-- Array aggregation
SELECT role, ARRAY_AGG(name) AS member_list
FROM users GROUP BY role;
๐ŸชŸ

Window Functions

Window functions perform calculations across a set of rows related to the current row โ€” without collapsing rows like GROUP BY.

-- ROW_NUMBER, RANK, DENSE_RANK
SELECT name, role, age,
    ROW_NUMBER() OVER (PARTITION BY role ORDER BY age DESC) AS row_num,
    RANK()       OVER (ORDER BY age DESC) AS rnk,
    DENSE_RANK() OVER (ORDER BY age DESC) AS dense_rnk
FROM users;

-- LAG / LEAD (previous/next row value)
SELECT ordered_at, total,
    total - LAG(total) OVER (ORDER BY ordered_at) AS change
FROM orders;

-- Running total
SELECT ordered_at, total,
    SUM(total) OVER (ORDER BY ordered_at) AS running_total
FROM orders;

-- NTILE (divide into N buckets)
SELECT name, age,
    NTILE(4) OVER (ORDER BY age) AS quartile
FROM users;

-- Percent rank & percentile
SELECT name, age,
    PERCENT_RANK() OVER (ORDER BY age) AS pct_rank
FROM users;
๐Ÿ’ก RANK vs DENSE_RANK vs ROW_NUMBER
  • ROW_NUMBER: Always unique (1, 2, 3, 4, 5)
  • RANK: Ties get same rank, gaps after (1, 2, 2, 4, 5)
  • DENSE_RANK: Ties get same rank, no gaps (1, 2, 2, 3, 4)
๐Ÿ”

CTEs & Subqueries

-- Common Table Expression (WITH clause)
WITH active_users AS (
    SELECT * FROM users WHERE role != 'inactive'
),
user_orders AS (
    SELECT user_id, SUM(total) AS total_spent
    FROM orders GROUP BY user_id
)
SELECT a.name, COALESCE(uo.total_spent, 0) AS spent
FROM active_users a
LEFT JOIN user_orders uo ON a.id = uo.user_id;

-- Recursive CTE (org hierarchy)
WITH RECURSIVE org_tree AS (
    -- Base case: top-level managers
    SELECT id, name, manager_id, 1 AS depth
    FROM employees WHERE manager_id IS NULL

    UNION ALL

    -- Recursive case: find subordinates
    SELECT e.id, e.name, e.manager_id, t.depth + 1
    FROM employees e
    JOIN org_tree t ON e.manager_id = t.id
)
SELECT * FROM org_tree ORDER BY depth, name;

-- Subquery in WHERE
SELECT * FROM users
WHERE id IN (SELECT user_id FROM orders WHERE total > 100);

-- EXISTS (more efficient than IN for large sets)
SELECT * FROM users u
WHERE EXISTS (
    SELECT 1 FROM orders o WHERE o.user_id = u.id AND o.total > 100
);
โšก

Indexes

Indexes speed up reads but slow down writes. Choose wisely based on query patterns.

-- B-tree (default, most common)
CREATE INDEX idx_users_email ON users(email);

-- Unique index
CREATE UNIQUE INDEX idx_users_email_unique ON users(email);

-- Composite index (multi-column)
CREATE INDEX idx_orders_user_date ON orders(user_id, ordered_at DESC);

-- Partial index (only index subset of rows)
CREATE INDEX idx_active_users ON users(email) WHERE role = 'active';

-- GIN index (for JSONB, arrays, full-text search)
CREATE INDEX idx_users_metadata ON users USING GIN(metadata);

-- GiST index (for geometric, range, full-text)
CREATE INDEX idx_locations_geo ON locations USING GIST(coordinates);

-- Expression index
CREATE INDEX idx_users_lower_email ON users(LOWER(email));

-- Concurrent index creation (non-blocking)
CREATE INDEX CONCURRENTLY idx_name ON users(name);

-- Drop index
DROP INDEX idx_users_email;
Index TypeBest ForExample
B-tree=, <, >, BETWEEN, LIKE 'abc%'Most columns
HashEquality (=) onlyExact lookups
GINJSONB, arrays, full-text, tsvectormetadata @> '{"a":1}'
GiSTGeometry, ranges, nearest-neighborPostGIS, range queries
BRINVery large, naturally ordered tablesTime-series data (created_at)
๐Ÿ”

Transactions

ACID Properties

BEGIN;  -- or START TRANSACTION

UPDATE accounts SET balance = balance - 100 WHERE id = 1;
UPDATE accounts SET balance = balance + 100 WHERE id = 2;

-- If everything is OK:
COMMIT;

-- If something went wrong:
ROLLBACK;

-- Savepoints (partial rollback)
BEGIN;
INSERT INTO users (...) VALUES (...);
SAVEPOINT sp1;
INSERT INTO orders (...) VALUES (...);  -- this might fail
ROLLBACK TO sp1;  -- undo only the order insert
COMMIT;  -- user insert is still committed

Isolation Levels

LevelDirty ReadNon-Repeatable ReadPhantom ReadUse Case
Read Uncommittedโœ… Possibleโœ… Possibleโœ… PossibleAlmost never used in Postgres
Read CommittedโŒ Noโœ… Possibleโœ… PossibleDefault in PostgreSQL
Repeatable ReadโŒ NoโŒ NoโŒ No*Financial reports, analytics
SerializableโŒ NoโŒ NoโŒ NoStrictest โ€” as if sequential

* PostgreSQL's Repeatable Read is based on MVCC snapshots and prevents phantom reads too.

๐Ÿ“‹

JSON Operations

Use JSONB over JSON โ€” it's binary, faster, and indexable.

-- Create table with JSONB
CREATE TABLE events (
    id    SERIAL PRIMARY KEY,
    data  JSONB NOT NULL
);

-- Insert JSON
INSERT INTO events (data) VALUES
    ('{"type": "click", "page": "/home", "user_id": 42}'::jsonb);

-- Access JSON fields
SELECT
    data->'type'          AS type_json,    -- returns JSONB: "click"
    data->>'type'         AS type_text,    -- returns TEXT: click
    data->'user'->'name'  AS nested_json,  -- nested access
    data#>>'{user,name}'  AS nested_path   -- path access as text
FROM events;

-- Filter by JSON field
SELECT * FROM events WHERE data->>'type' = 'click';

-- Containment operator (uses GIN index!)
SELECT * FROM events WHERE data @> '{"type": "click"}'::jsonb;

-- Update JSON fields
UPDATE events SET data = data || '{"processed": true}'::jsonb;
UPDATE events SET data = data - 'temporary_field';  -- remove key
UPDATE events SET data = jsonb_set(data, '{page}', '"/"');

-- Aggregate to JSON
SELECT jsonb_agg(jsonb_build_object('name', name, 'age', age))
FROM users;
๐Ÿ‘๏ธ

Views & Materialized Views

-- Regular view (virtual โ€” query runs every time)
CREATE VIEW active_users AS
    SELECT id, name, email FROM users WHERE role = 'active';

SELECT * FROM active_users;  -- uses the view

-- Materialized view (cached โ€” must refresh manually)
CREATE MATERIALIZED VIEW user_stats AS
    SELECT u.id, u.name,
        COUNT(o.id) AS order_count,
        COALESCE(SUM(o.total), 0) AS total_spent
    FROM users u
    LEFT JOIN orders o ON u.id = o.user_id
    GROUP BY u.id;

-- Refresh materialized view
REFRESH MATERIALIZED VIEW CONCURRENTLY user_stats;
-- CONCURRENTLY requires a UNIQUE index on the view

Regular View

  • Always fresh data
  • Query runs each time
  • No storage cost
  • Can be slow for complex queries

Materialized View

  • Cached / snapshot data
  • Must REFRESH manually
  • Uses disk storage
  • Instant reads โ€” great for dashboards
๐Ÿ”ง

Functions & Procedures

-- Function (returns a value)
CREATE OR REPLACE FUNCTION get_user_total(uid INT)
RETURNS DECIMAL AS $$
BEGIN
    RETURN (
        SELECT COALESCE(SUM(total), 0)
        FROM orders WHERE user_id = uid
    );
END;
$$ LANGUAGE plpgsql;

-- Use it
SELECT name, get_user_total(id) FROM users;

-- Procedure (no return value, can manage transactions)
CREATE OR REPLACE PROCEDURE transfer_funds(
    from_id INT, to_id INT, amount DECIMAL
) AS $$
BEGIN
    UPDATE accounts SET balance = balance - amount WHERE id = from_id;
    UPDATE accounts SET balance = balance + amount WHERE id = to_id;
    COMMIT;
END;
$$ LANGUAGE plpgsql;

-- Call procedure
CALL transfer_funds(1, 2, 50.00);
๐Ÿš€

Performance Tuning

EXPLAIN ANALYZE

-- Show query plan with actual execution time
EXPLAIN ANALYZE
SELECT u.name, COUNT(o.id)
FROM users u
JOIN orders o ON u.id = o.user_id
GROUP BY u.name;

-- Key things to look for:
-- Seq Scan โ†’ consider adding an index
-- Nested Loop with high rows โ†’ may need different join strategy
-- Sort โ†’ add index for ORDER BY columns
-- actual time vs planned time discrepancy โ†’ stale stats

-- Update statistics
ANALYZE users;
ANALYZE;  -- whole database

Performance Tips

๐Ÿ› ๏ธ

Administration

-- Database size
SELECT pg_size_pretty(pg_database_size('mydb'));

-- Table sizes
SELECT tablename,
    pg_size_pretty(pg_total_relation_size(tablename::regclass)) AS total_size
FROM pg_tables
WHERE schemaname = 'public'
ORDER BY pg_total_relation_size(tablename::regclass) DESC;

-- Active queries
SELECT pid, query, state, now() - query_start AS duration
FROM pg_stat_activity
WHERE state != 'idle' ORDER BY duration DESC;

-- Kill a slow query
SELECT pg_terminate_backend(pid);

-- Backup & Restore
-- pg_dump -U postgres -d mydb -F c -f backup.dump
-- pg_restore -U postgres -d mydb backup.dump

-- Roles & Permissions
CREATE ROLE readonly LOGIN PASSWORD 'secret';
GRANT SELECT ON ALL TABLES IN SCHEMA public TO readonly;
ALTER DEFAULT PRIVILEGES IN SCHEMA public GRANT SELECT ON TABLES TO readonly;