Schemas, queries, joins, indexes, transactions, window functions, JSON, performance tuning & more
Database / SQL-- Connect
psql -h localhost -U postgres -d mydb
-- Create / Drop database
CREATE DATABASE mydb;
DROP DATABASE mydb;
-- List databases & tables (psql shortcuts)
\l -- list databases
\c mydb -- connect to database
\dt -- list tables
\d users -- describe table structure
\di -- list indexes
\df -- list functions
\du -- list roles/users
\timing -- toggle query timingCREATE TABLE users (
id SERIAL PRIMARY KEY,
email VARCHAR(255) UNIQUE NOT NULL,
name VARCHAR(100) NOT NULL,
age INT CHECK (age >= 0 AND age <= 150),
role VARCHAR(20) DEFAULT 'user',
created_at TIMESTAMPTZ DEFAULT NOW()
);
CREATE TABLE orders (
id SERIAL PRIMARY KEY,
user_id INT REFERENCES users(id) ON DELETE CASCADE,
total DECIMAL(10,2) NOT NULL,
status VARCHAR(20) DEFAULT 'pending',
ordered_at TIMESTAMPTZ DEFAULT NOW()
);ALTER TABLE users ADD COLUMN phone VARCHAR(20);
ALTER TABLE users DROP COLUMN phone;
ALTER TABLE users ALTER COLUMN name SET NOT NULL;
ALTER TABLE users RENAME COLUMN name TO full_name;
ALTER TABLE users ADD CONSTRAINT unique_email UNIQUE(email);| Type | Description | Example |
|---|---|---|
SERIAL / BIGSERIAL | Auto-increment integer | Primary keys |
UUID | Universally unique identifier | gen_random_uuid() |
VARCHAR(n) | Variable-length string up to n chars | Names, emails |
TEXT | Unlimited-length string | Descriptions, content |
INT / BIGINT | 32-bit / 64-bit integer | Counts, foreign keys |
DECIMAL(p,s) | Exact numeric with precision | Money, prices |
BOOLEAN | true / false | Flags |
TIMESTAMPTZ | Timestamp with timezone | Always use this over TIMESTAMP |
JSONB | Binary JSON (indexable, faster) | Flexible schema data |
ARRAY | Array of any type | INT[], TEXT[] |
-- INSERT
INSERT INTO users (email, name, age) VALUES
('alice@example.com', 'Alice', 30),
('bob@example.com', 'Bob', 25);
-- INSERT with conflict handling (upsert)
INSERT INTO users (email, name)
VALUES ('alice@example.com', 'Alice Updated')
ON CONFLICT (email) DO UPDATE SET name = EXCLUDED.name;
-- SELECT
SELECT * FROM users WHERE age > 25 ORDER BY name LIMIT 10;
SELECT DISTINCT role FROM users;
SELECT * FROM users WHERE name ILIKE '%alice%'; -- case-insensitive
SELECT * FROM users WHERE role IN ('admin', 'moderator');
SELECT * FROM users WHERE created_at BETWEEN '2024-01-01' AND '2024-12-31';
-- UPDATE
UPDATE users SET role = 'admin' WHERE email = 'alice@example.com';
-- DELETE
DELETE FROM users WHERE age < 18;
TRUNCATE TABLE users RESTART IDENTITY CASCADE; -- fast delete all
-- RETURNING (get affected rows)
INSERT INTO users (email, name) VALUES ('c@d.com', 'C') RETURNING id, email;
DELETE FROM users WHERE id = 5 RETURNING *;-- INNER JOIN (only matching rows)
SELECT u.name, o.total
FROM users u
INNER JOIN orders o ON u.id = o.user_id;
-- LEFT JOIN (all users, even without orders)
SELECT u.name, COALESCE(SUM(o.total), 0) AS total_spent
FROM users u
LEFT JOIN orders o ON u.id = o.user_id
GROUP BY u.name;
-- CROSS JOIN (every combination โ cartesian product)
SELECT * FROM colors CROSS JOIN sizes;
-- SELF JOIN (employees and their managers)
SELECT e.name AS employee, m.name AS manager
FROM employees e
LEFT JOIN employees m ON e.manager_id = m.id;
-- LATERAL JOIN (correlated subquery for each row)
SELECT u.name, latest.total
FROM users u
LEFT JOIN LATERAL (
SELECT total FROM orders WHERE user_id = u.id
ORDER BY ordered_at DESC LIMIT 1
) latest ON TRUE;-- Basic aggregates
SELECT
COUNT(*) AS total_users,
COUNT(DISTINCT role) AS unique_roles,
AVG(age) AS avg_age,
MIN(age) AS youngest,
MAX(age) AS oldest,
SUM(age) AS age_sum
FROM users;
-- GROUP BY with HAVING
SELECT role, COUNT(*) AS cnt
FROM users
GROUP BY role
HAVING COUNT(*) > 5
ORDER BY cnt DESC;
-- GROUPING SETS (multiple groupings in one query)
SELECT role, status, COUNT(*)
FROM users
GROUP BY GROUPING SETS ((role), (status), ());
-- String aggregation
SELECT role, STRING_AGG(name, ', ' ORDER BY name) AS members
FROM users GROUP BY role;
-- Array aggregation
SELECT role, ARRAY_AGG(name) AS member_list
FROM users GROUP BY role;Window functions perform calculations across a set of rows related to the current row โ without collapsing rows like GROUP BY.
-- ROW_NUMBER, RANK, DENSE_RANK
SELECT name, role, age,
ROW_NUMBER() OVER (PARTITION BY role ORDER BY age DESC) AS row_num,
RANK() OVER (ORDER BY age DESC) AS rnk,
DENSE_RANK() OVER (ORDER BY age DESC) AS dense_rnk
FROM users;
-- LAG / LEAD (previous/next row value)
SELECT ordered_at, total,
total - LAG(total) OVER (ORDER BY ordered_at) AS change
FROM orders;
-- Running total
SELECT ordered_at, total,
SUM(total) OVER (ORDER BY ordered_at) AS running_total
FROM orders;
-- NTILE (divide into N buckets)
SELECT name, age,
NTILE(4) OVER (ORDER BY age) AS quartile
FROM users;
-- Percent rank & percentile
SELECT name, age,
PERCENT_RANK() OVER (ORDER BY age) AS pct_rank
FROM users;ROW_NUMBER: Always unique (1, 2, 3, 4, 5)RANK: Ties get same rank, gaps after (1, 2, 2, 4, 5)DENSE_RANK: Ties get same rank, no gaps (1, 2, 2, 3, 4)-- Common Table Expression (WITH clause)
WITH active_users AS (
SELECT * FROM users WHERE role != 'inactive'
),
user_orders AS (
SELECT user_id, SUM(total) AS total_spent
FROM orders GROUP BY user_id
)
SELECT a.name, COALESCE(uo.total_spent, 0) AS spent
FROM active_users a
LEFT JOIN user_orders uo ON a.id = uo.user_id;
-- Recursive CTE (org hierarchy)
WITH RECURSIVE org_tree AS (
-- Base case: top-level managers
SELECT id, name, manager_id, 1 AS depth
FROM employees WHERE manager_id IS NULL
UNION ALL
-- Recursive case: find subordinates
SELECT e.id, e.name, e.manager_id, t.depth + 1
FROM employees e
JOIN org_tree t ON e.manager_id = t.id
)
SELECT * FROM org_tree ORDER BY depth, name;
-- Subquery in WHERE
SELECT * FROM users
WHERE id IN (SELECT user_id FROM orders WHERE total > 100);
-- EXISTS (more efficient than IN for large sets)
SELECT * FROM users u
WHERE EXISTS (
SELECT 1 FROM orders o WHERE o.user_id = u.id AND o.total > 100
);Indexes speed up reads but slow down writes. Choose wisely based on query patterns.
-- B-tree (default, most common)
CREATE INDEX idx_users_email ON users(email);
-- Unique index
CREATE UNIQUE INDEX idx_users_email_unique ON users(email);
-- Composite index (multi-column)
CREATE INDEX idx_orders_user_date ON orders(user_id, ordered_at DESC);
-- Partial index (only index subset of rows)
CREATE INDEX idx_active_users ON users(email) WHERE role = 'active';
-- GIN index (for JSONB, arrays, full-text search)
CREATE INDEX idx_users_metadata ON users USING GIN(metadata);
-- GiST index (for geometric, range, full-text)
CREATE INDEX idx_locations_geo ON locations USING GIST(coordinates);
-- Expression index
CREATE INDEX idx_users_lower_email ON users(LOWER(email));
-- Concurrent index creation (non-blocking)
CREATE INDEX CONCURRENTLY idx_name ON users(name);
-- Drop index
DROP INDEX idx_users_email;| Index Type | Best For | Example |
|---|---|---|
| B-tree | =, <, >, BETWEEN, LIKE 'abc%' | Most columns |
| Hash | Equality (=) only | Exact lookups |
| GIN | JSONB, arrays, full-text, tsvector | metadata @> '{"a":1}' |
| GiST | Geometry, ranges, nearest-neighbor | PostGIS, range queries |
| BRIN | Very large, naturally ordered tables | Time-series data (created_at) |
BEGIN; -- or START TRANSACTION
UPDATE accounts SET balance = balance - 100 WHERE id = 1;
UPDATE accounts SET balance = balance + 100 WHERE id = 2;
-- If everything is OK:
COMMIT;
-- If something went wrong:
ROLLBACK;
-- Savepoints (partial rollback)
BEGIN;
INSERT INTO users (...) VALUES (...);
SAVEPOINT sp1;
INSERT INTO orders (...) VALUES (...); -- this might fail
ROLLBACK TO sp1; -- undo only the order insert
COMMIT; -- user insert is still committed| Level | Dirty Read | Non-Repeatable Read | Phantom Read | Use Case |
|---|---|---|---|---|
| Read Uncommitted | โ Possible | โ Possible | โ Possible | Almost never used in Postgres |
| Read Committed | โ No | โ Possible | โ Possible | Default in PostgreSQL |
| Repeatable Read | โ No | โ No | โ No* | Financial reports, analytics |
| Serializable | โ No | โ No | โ No | Strictest โ as if sequential |
* PostgreSQL's Repeatable Read is based on MVCC snapshots and prevents phantom reads too.
Use JSONB over JSON โ it's binary, faster, and indexable.
-- Create table with JSONB
CREATE TABLE events (
id SERIAL PRIMARY KEY,
data JSONB NOT NULL
);
-- Insert JSON
INSERT INTO events (data) VALUES
('{"type": "click", "page": "/home", "user_id": 42}'::jsonb);
-- Access JSON fields
SELECT
data->'type' AS type_json, -- returns JSONB: "click"
data->>'type' AS type_text, -- returns TEXT: click
data->'user'->'name' AS nested_json, -- nested access
data#>>'{user,name}' AS nested_path -- path access as text
FROM events;
-- Filter by JSON field
SELECT * FROM events WHERE data->>'type' = 'click';
-- Containment operator (uses GIN index!)
SELECT * FROM events WHERE data @> '{"type": "click"}'::jsonb;
-- Update JSON fields
UPDATE events SET data = data || '{"processed": true}'::jsonb;
UPDATE events SET data = data - 'temporary_field'; -- remove key
UPDATE events SET data = jsonb_set(data, '{page}', '"/"');
-- Aggregate to JSON
SELECT jsonb_agg(jsonb_build_object('name', name, 'age', age))
FROM users;-- Regular view (virtual โ query runs every time)
CREATE VIEW active_users AS
SELECT id, name, email FROM users WHERE role = 'active';
SELECT * FROM active_users; -- uses the view
-- Materialized view (cached โ must refresh manually)
CREATE MATERIALIZED VIEW user_stats AS
SELECT u.id, u.name,
COUNT(o.id) AS order_count,
COALESCE(SUM(o.total), 0) AS total_spent
FROM users u
LEFT JOIN orders o ON u.id = o.user_id
GROUP BY u.id;
-- Refresh materialized view
REFRESH MATERIALIZED VIEW CONCURRENTLY user_stats;
-- CONCURRENTLY requires a UNIQUE index on the view-- Function (returns a value)
CREATE OR REPLACE FUNCTION get_user_total(uid INT)
RETURNS DECIMAL AS $$
BEGIN
RETURN (
SELECT COALESCE(SUM(total), 0)
FROM orders WHERE user_id = uid
);
END;
$$ LANGUAGE plpgsql;
-- Use it
SELECT name, get_user_total(id) FROM users;
-- Procedure (no return value, can manage transactions)
CREATE OR REPLACE PROCEDURE transfer_funds(
from_id INT, to_id INT, amount DECIMAL
) AS $$
BEGIN
UPDATE accounts SET balance = balance - amount WHERE id = from_id;
UPDATE accounts SET balance = balance + amount WHERE id = to_id;
COMMIT;
END;
$$ LANGUAGE plpgsql;
-- Call procedure
CALL transfer_funds(1, 2, 50.00);-- Show query plan with actual execution time
EXPLAIN ANALYZE
SELECT u.name, COUNT(o.id)
FROM users u
JOIN orders o ON u.id = o.user_id
GROUP BY u.name;
-- Key things to look for:
-- Seq Scan โ consider adding an index
-- Nested Loop with high rows โ may need different join strategy
-- Sort โ add index for ORDER BY columns
-- actual time vs planned time discrepancy โ stale stats
-- Update statistics
ANALYZE users;
ANALYZE; -- whole databaseSELECT * โ only fetch columns you needLIMIT/OFFSET or keyset paginationVACUUM ANALYZE to reclaim space and update statsCOPY for bulk loading instead of many INSERTs-- Database size
SELECT pg_size_pretty(pg_database_size('mydb'));
-- Table sizes
SELECT tablename,
pg_size_pretty(pg_total_relation_size(tablename::regclass)) AS total_size
FROM pg_tables
WHERE schemaname = 'public'
ORDER BY pg_total_relation_size(tablename::regclass) DESC;
-- Active queries
SELECT pid, query, state, now() - query_start AS duration
FROM pg_stat_activity
WHERE state != 'idle' ORDER BY duration DESC;
-- Kill a slow query
SELECT pg_terminate_backend(pid);
-- Backup & Restore
-- pg_dump -U postgres -d mydb -F c -f backup.dump
-- pg_restore -U postgres -d mydb backup.dump
-- Roles & Permissions
CREATE ROLE readonly LOGIN PASSWORD 'secret';
GRANT SELECT ON ALL TABLES IN SCHEMA public TO readonly;
ALTER DEFAULT PRIVILEGES IN SCHEMA public GRANT SELECT ON TABLES TO readonly;