Database Optimization Techniques

Essential techniques for optimizing database performance and query efficiency.

Indexing Strategies

Proper indexing is crucial for query performance:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
-- Create single column index
CREATE INDEX idx_users_email ON users(email);

-- Create composite index
CREATE INDEX idx_orders_user_date ON orders(user_id, created_at);

-- Create unique index
CREATE UNIQUE INDEX idx_users_username ON users(username);

-- Create partial index (PostgreSQL)
CREATE INDEX idx_active_users ON users(email) WHERE active = true;

-- View existing indexes
SELECT * FROM pg_indexes WHERE tablename = 'users';

Query Optimization

Write efficient queries:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
-- Bad: Using SELECT *
SELECT * FROM users WHERE id = 1;

-- Good: Select only needed columns
SELECT id, name, email FROM users WHERE id = 1;

-- Bad: Using OR with different columns
SELECT * FROM products WHERE category = 'electronics' OR price < 100;

-- Good: Use UNION for OR conditions on different columns
SELECT * FROM products WHERE category = 'electronics'
UNION
SELECT * FROM products WHERE price < 100;

-- Use EXPLAIN to analyze queries
EXPLAIN ANALYZE
SELECT u.name, COUNT(o.id) as order_count
FROM users u
LEFT JOIN orders o ON u.id = o.user_id
GROUP BY u.id, u.name;

Avoiding N+1 Queries

Use eager loading to prevent N+1 problems:

1
2
3
4
5
6
7
8
9
10
11
# Bad: N+1 query problem
users = User.query.all()
for user in users:
print(user.orders) # Separate query for each user

# Good: Eager loading
from sqlalchemy.orm import joinedload

users = User.query.options(joinedload(User.orders)).all()
for user in users:
print(user.orders) # No additional queries

Connection Pooling

Reuse database connections:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
// Node.js with pg-pool
const { Pool } = require('pg');

const pool = new Pool({
host: 'localhost',
database: 'mydb',
max: 20, // Maximum connections
idleTimeoutMillis: 30000,
connectionTimeoutMillis: 2000,
});

// Use pool for queries
async function getUser(id) {
const client = await pool.connect();
try {
const result = await client.query(
'SELECT * FROM users WHERE id = $1',
[id]
);
return result.rows[0];
} finally {
client.release();
}
}

Caching Strategies

Implement caching to reduce database load:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
import redis
import json

redis_client = redis.Redis(host='localhost', port=6379, db=0)

def get_user(user_id):
# Try cache first
cache_key = f'user:{user_id}'
cached = redis_client.get(cache_key)

if cached:
return json.loads(cached)

# Query database if not in cache
user = db.query(User).filter(User.id == user_id).first()

# Store in cache for 1 hour
redis_client.setex(
cache_key,
3600,
json.dumps(user.to_dict())
)

return user

Batch Operations

Process multiple records efficiently:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
-- Bad: Multiple individual inserts
INSERT INTO users (name, email) VALUES ('Alice', 'alice@example.com');
INSERT INTO users (name, email) VALUES ('Bob', 'bob@example.com');
INSERT INTO users (name, email) VALUES ('Charlie', 'charlie@example.com');

-- Good: Batch insert
INSERT INTO users (name, email) VALUES
('Alice', 'alice@example.com'),
('Bob', 'bob@example.com'),
('Charlie', 'charlie@example.com');

-- Batch update
UPDATE users
SET status = 'active'
WHERE id IN (1, 2, 3, 4, 5);

Partitioning

Split large tables for better performance:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
-- Create partitioned table (PostgreSQL)
CREATE TABLE orders (
id SERIAL,
user_id INTEGER,
created_at TIMESTAMP,
total DECIMAL
) PARTITION BY RANGE (created_at);

-- Create partitions
CREATE TABLE orders_2024 PARTITION OF orders
FOR VALUES FROM ('2024-01-01') TO ('2025-01-01');

CREATE TABLE orders_2025 PARTITION OF orders
FOR VALUES FROM ('2025-01-01') TO ('2026-01-01');

Monitoring and Analysis

Track query performance:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
-- Enable query logging (PostgreSQL)
ALTER SYSTEM SET log_min_duration_statement = 1000; -- Log queries > 1s

-- Find slow queries
SELECT query, calls, total_time, mean_time
FROM pg_stat_statements
ORDER BY mean_time DESC
LIMIT 10;

-- Check table sizes
SELECT
schemaname,
tablename,
pg_size_pretty(pg_total_relation_size(schemaname||'.'||tablename)) AS size
FROM pg_tables
ORDER BY pg_total_relation_size(schemaname||'.'||tablename) DESC;

Optimize your database for better application performance!