SQL Fundamentals
SQL (Structured Query Language) is the standard language for interacting with relational databases. It’s declarative—you specify what you want, not how to get it.
Common types across systems (syntax varies by RDBMS):
- Numeric:
INT,BIGINT,SMALLINT,DECIMAL(p,s),NUMERIC,FLOAT,REAL,DOUBLE - Character:
CHAR(n),VARCHAR(n),TEXT - Date/Time:
DATE,TIME,DATETIME,TIMESTAMP,INTERVAL - Binary:
BLOB,BYTEA,VARBINARY - Boolean:
BOOLEAN (some systems use TINYINT) - Other:
JSON,XML,UUID,ENUM,ARRAY (PostgreSQL)
Creating Tables
1CREATE TABLE employees (
2 employee_id INT PRIMARY KEY AUTO_INCREMENT,
3 first_name VARCHAR(50) NOT NULL,
4 last_name VARCHAR(50) NOT NULL,
5 email VARCHAR(100) UNIQUE,
6 hire_date DATE,
7 salary DECIMAL(10, 2),
8 department_id INT,
9 FOREIGN KEY (department_id) REFERENCES departments(department_id)
10);Constraints:
NOT NULLUNIQUEPRIMARY KEYFOREIGN KEYCHECKDEFAULT
Inserting Data
1-- Single row
2INSERT INTO employees (first_name, last_name, email, hire_date, salary)
3VALUES ('John', 'Doe', 'john@example.com', '2024-01-15', 75000);
4
5-- Multiple rows
6INSERT INTO employees (first_name, last_name, email)
7VALUES
8 ('Jane', 'Smith', 'jane@example.com'),
9 ('Bob', 'Johnson', 'bob@example.com');
10
11-- From another table
12INSERT INTO archived_employees
13SELECT * FROM employees WHERE hire_date < '2020-01-01';Select Queries
1-- Basic select
2SELECT first_name, last_name, salary FROM employees;
3
4-- All columns
5SELECT * FROM employees;
6
7-- With WHERE clause
8SELECT * FROM employees WHERE salary > 60000;
9
10-- Multiple conditions
11SELECT * FROM employees
12WHERE salary > 60000 AND department_id = 3;
13
14-- Pattern matching
15SELECT * FROM employees WHERE last_name LIKE 'S%';
16
17-- IN operator
18SELECT * FROM employees WHERE department_id IN (1, 3, 5);
19
20-- BETWEEN
21SELECT * FROM employees WHERE salary BETWEEN 50000 AND 80000;
22
23-- IS NULL / IS NOT NULL
24SELECT * FROM employees WHERE email IS NOT NULL;
25
26-- DISTINCT
27SELECT DISTINCT department_id FROM employees;
28
29-- ORDER BY
30SELECT * FROM employees ORDER BY salary DESC, last_name ASC;
31
32-- LIMIT (MySQL/PostgreSQL) or TOP (SQL Server)
33SELECT * FROM employees ORDER BY salary DESC LIMIT 10;Aggregate Functions
1SELECT
2 COUNT(*) as total_employees,
3 AVG(salary) as avg_salary,
4 MIN(salary) as min_salary,
5 MAX(salary) as max_salary,
6 SUM(salary) as total_payroll
7FROM employees;GROUP BY and HAVING
1-- Group by department
2SELECT department_id, COUNT(*) as employee_count, AVG(salary) as avg_salary
3FROM employees
4GROUP BY department_id;
5
6-- HAVING filters groups (WHERE filters rows)
7SELECT department_id, AVG(salary) as avg_salary
8FROM employees
9GROUP BY department_id
10HAVING AVG(salary) > 70000;Key difference: WHERE filters before grouping, HAVING filters after grouping.
INNER JOIN
Returns rows with matches in both tables
1SELECT e.first_name, e.last_name, d.department_name
2FROM employees e
3INNER JOIN departments d ON e.department_id = d.department_id;or
1SELECT e.first_name, e.last_name, d.department_name
2FROM employees e, departments d
3WHERE e.department_id = d.department_idLEFT JOIN
Returns all rows from left table, matching rows from right (NULL if no match).
1SELECT e.first_name, e.last_name, d.department_name
2FROM employees e
3LEFT JOIN departments d ON e.department_id = d.department_id;RIGHT JOIN
Returns all rows from right table, matching rows from left.
FULL OUTER JOIN
Returns all rows from both tables (not supported in MySQL).
CROSS JOIN
Cartesian product of both tables.
1SELECT * FROM table1 CROSS JOIN table2;SELF JOIN
Joining a table to itself
1SELECT e1.first_name, e2.first_name as manager_name
2FROM employees e1
3LEFT JOIN employees e2 ON e1.manager_id = e2.employee_id;Subqueries
1-- In WHERE clause
2SELECT first_name, last_name
3FROM employees
4WHERE salary > (SELECT AVG(salary) FROM employees);
5
6-- In FROM clause (derived table)
7SELECT dept_stats.department_id, dept_stats.avg_salary
8FROM (
9 SELECT department_id, AVG(salary) as avg_salary
10 FROM employees
11 GROUP BY department_id
12) as dept_stats
13WHERE dept_stats.avg_salary > 70000;
14
15-- Correlated subquery
16SELECT e1.first_name, e1.salary
17FROM employees e1
18WHERE e1.salary > (
19 SELECT AVG(e2.salary)
20 FROM employees e2
21 WHERE e2.department_id = e1.department_id
22);
23
24-- EXISTS
25SELECT d.department_name
26FROM departments d
27WHERE EXISTS (
28 SELECT 1 FROM employees e WHERE e.department_id = d.department_id
29);Set Operations
1-- UNION (removes duplicates)
2SELECT first_name FROM employees
3UNION
4SELECT first_name FROM contractors;
5
6-- UNION ALL (keeps duplicates)
7SELECT first_name FROM employees
8UNION ALL
9SELECT first_name FROM contractors;
10
11-- INTERSECT
12SELECT first_name FROM employees
13INTERSECT
14SELECT first_name FROM contractors;
15
16-- EXCEPT (or MINUS in Oracle)
17SELECT first_name FROM employees
18EXCEPT
19SELECT first_name FROM contractors;UPDATE
1-- Update single column
2UPDATE employees SET salary = 80000 WHERE employee_id = 5;
3
4-- Update multiple columns
5UPDATE employees
6SET salary = salary * 1.1, last_updated = NOW()
7WHERE department_id = 3;
8
9-- Update with JOIN (syntax varies)
10UPDATE employees e
11INNER JOIN departments d ON e.department_id = d.department_id
12SET e.bonus = d.bonus_multiplier * e.salary
13WHERE d.department_name = 'Sales';DELETE
1-- Delete specific rows
2DELETE FROM employees WHERE employee_id = 5;
3
4-- Delete based on condition
5DELETE FROM employees WHERE hire_date < '2015-01-01';
6
7-- Delete all rows (slower, logged)
8DELETE FROM employees;
9
10-- TRUNCATE (faster, minimal logging, resets auto-increment)
11TRUNCATE TABLE employees;Transactions
1START TRANSACTION; -- or BEGIN
2
3UPDATE accounts SET balance = balance - 100 WHERE account_id = 1;
4UPDATE accounts SET balance = balance + 100 WHERE account_id = 2;
5
6-- If everything is correct
7COMMIT;
8
9-- If there's an error
10ROLLBACK;
11
12-- Savepoints
13START TRANSACTION;
14UPDATE accounts SET balance = balance - 100 WHERE account_id = 1;
15SAVEPOINT sp1;
16UPDATE accounts SET balance = balance + 100 WHERE account_id = 2;
17-- If second update fails
18ROLLBACK TO sp1;
19COMMIT;Window Functions
Perform calculations across rows related to the current row without collapsing results.
1-- Row number
2SELECT
3 first_name,
4 salary,
5 ROW_NUMBER() OVER (ORDER BY salary DESC) as rank
6FROM employees;
7
8-- Partition by department
9SELECT
10 first_name,
11 department_id,
12 salary,
13 AVG(salary) OVER (PARTITION BY department_id) as dept_avg
14FROM employees;
15
16-- Running total
17SELECT
18 order_date,
19 amount,
20 SUM(amount) OVER (ORDER BY order_date) as running_total
21FROM orders;
22
23-- Ranking functions
24SELECT
25 first_name,
26 salary,
27 ROW_NUMBER() OVER (ORDER BY salary DESC) as row_num,
28 RANK() OVER (ORDER BY salary DESC) as rank,
29 DENSE_RANK() OVER (ORDER BY salary DESC) as dense_rank,
30 NTILE(4) OVER (ORDER BY salary DESC) as quartile
31FROM employees;
32
33-- LAG and LEAD
34SELECT
35 order_date,
36 amount,
37 LAG(amount, 1) OVER (ORDER BY order_date) as previous_amount,
38 LEAD(amount, 1) OVER (ORDER BY order_date) as next_amount
39FROM orders;Common Table Expressions (CTEs)
1-- Basic CTE
2WITH high_earners AS (
3 SELECT * FROM employees WHERE salary > 100000
4)
5SELECT * FROM high_earners WHERE department_id = 3;
6
7-- Multiple CTEs
8WITH
9dept_totals AS (
10 SELECT department_id, SUM(salary) as total_salary
11 FROM employees
12 GROUP BY department_id
13),
14dept_averages AS (
15 SELECT department_id, AVG(salary) as avg_salary
16 FROM employees
17 GROUP BY department_id
18)
19SELECT
20 t.department_id,
21 t.total_salary,
22 a.avg_salary
23FROM dept_totals t
24JOIN dept_averages a ON t.department_id = a.department_id;
25
26-- Recursive CTE (for hierarchies)
27WITH RECURSIVE employee_hierarchy AS (
28 -- Anchor member
29 SELECT employee_id, first_name, manager_id, 1 as level
30 FROM employees
31 WHERE manager_id IS NULL
32
33 UNION ALL
34
35 -- Recursive member
36 SELECT e.employee_id, e.first_name, e.manager_id, eh.level + 1
37 FROM employees e
38 INNER JOIN employee_hierarchy eh ON e.manager_id = eh.employee_id
39)
40SELECT * FROM employee_hierarchy;CASE Expressions
1-- Simple CASE
2SELECT
3 first_name,
4 salary,
5 CASE department_id
6 WHEN 1 THEN 'Sales'
7 WHEN 2 THEN 'Engineering'
8 WHEN 3 THEN 'HR'
9 ELSE 'Other'
10 END as department_name
11FROM employees;
12
13-- Searched CASE
14SELECT
15 first_name,
16 salary,
17 CASE
18 WHEN salary < 50000 THEN 'Low'
19 WHEN salary BETWEEN 50000 AND 100000 THEN 'Medium'
20 ELSE 'High'
21 END as salary_bracket
22FROM employees;Views
1-- Create view
2CREATE VIEW high_salary_employees AS
3SELECT first_name, last_name, salary, department_id
4FROM employees
5WHERE salary > 80000;
6
7-- Use view
8SELECT * FROM high_salary_employees WHERE department_id = 3;
9
10-- Updatable views (restrictions apply)
11CREATE VIEW engineering_staff AS
12SELECT employee_id, first_name, last_name, salary
13FROM employees
14WHERE department_id = 2;
15
16-- Update through view
17UPDATE engineering_staff SET salary = salary * 1.1;
18
19-- Drop view
20DROP VIEW high_salary_employees;
21
22-- Materialized views (PostgreSQL)
23CREATE MATERIALIZED VIEW dept_summary AS
24SELECT department_id, COUNT(*) as employee_count, AVG(salary) as avg_salary
25FROM employees
26GROUP BY department_id;
27
28-- Refresh materialized view
29REFRESH MATERIALIZED VIEW dept_summary;Indexes
Improve query performance by creating data structures that allow faster lookups
1-- Create index
2CREATE INDEX idx_employee_lastname ON employees(last_name);
3
4-- Composite index
5CREATE INDEX idx_dept_salary ON employees(department_id, salary);
6
7-- Unique index
8CREATE UNIQUE INDEX idx_employee_email ON employees(email);
9
10-- Drop index
11DROP INDEX idx_employee_lastname ON employees;
12
13-- Full-text index (MySQL)
14CREATE FULLTEXT INDEX idx_description ON products(description);
15
16-- Partial index (PostgreSQL)
17CREATE INDEX idx_active_employees ON employees(last_name)
18WHERE active = true;When to use indexes:
- Columns frequently used in WHERE clauses
- Foreign keys
- Columns used in JOIN conditions
- Columns used in ORDER BY
Trade-offs:
- Indexes speed up reads but slow down writes
- Indexes consume storage space
- Too many indexes can hurt performance
Stored Procedures
1-- MySQL/PostgreSQL syntax
2DELIMITER //
3CREATE PROCEDURE give_raise(IN emp_id INT, IN raise_pct DECIMAL(5,2))
4BEGIN
5 UPDATE employees
6 SET salary = salary * (1 + raise_pct / 100)
7 WHERE employee_id = emp_id;
8END //
9DELIMITER ;
10
11-- Call procedure
12CALL give_raise(5, 10);
13
14-- Procedure with OUT parameter
15DELIMITER //
16CREATE PROCEDURE get_employee_count(OUT emp_count INT)
17BEGIN
18 SELECT COUNT(*) INTO emp_count FROM employees;
19END //
20DELIMITER ;
21
22-- Use OUT parameter
23CALL get_employee_count(@count);
24SELECT @count;Triggers
Automatically execute code in response to events
1-- Before insert trigger
2CREATE TRIGGER before_employee_insert
3BEFORE INSERT ON employees
4FOR EACH ROW
5BEGIN
6 SET NEW.created_at = NOW();
7 SET NEW.email = LOWER(NEW.email);
8END;
9
10-- After update trigger
11CREATE TRIGGER after_salary_update
12AFTER UPDATE ON employees
13FOR EACH ROW
14BEGIN
15 IF NEW.salary != OLD.salary THEN
16 INSERT INTO salary_history (employee_id, old_salary, new_salary, changed_at)
17 VALUES (NEW.employee_id, OLD.salary, NEW.salary, NOW());
18 END IF;
19END;
20
21-- Drop trigger
22DROP TRIGGER before_employee_insert;Functions
User-defined functions return a single value
1-- Scalar function
2CREATE FUNCTION calculate_bonus(emp_salary DECIMAL(10,2))
3RETURNS DECIMAL(10,2)
4DETERMINISTIC
5BEGIN
6 RETURN emp_salary * 0.1;
7END;
8
9-- Use function
10SELECT first_name, salary, calculate_bonus(salary) as bonus
11FROM employees;
12
13-- Table-valued function (SQL Server)
14CREATE FUNCTION get_department_employees(@dept_id INT)
15RETURNS TABLE
16AS
17RETURN (
18 SELECT employee_id, first_name, last_name, salary
19 FROM employees
20 WHERE department_id = @dept_id
21);
22
23-- Use table function
24SELECT * FROM get_department_employees(3);Constraints
1-- Add constraint to existing table
2ALTER TABLE employees
3ADD CONSTRAINT chk_salary CHECK (salary > 0);
4
5ALTER TABLE employees
6ADD CONSTRAINT fk_department
7FOREIGN KEY (department_id) REFERENCES departments(department_id)
8ON DELETE CASCADE
9ON UPDATE CASCADE;
10
11-- Drop constraint
12ALTER TABLE employees
13DROP CONSTRAINT chk_salary;
14
15-- Check constraint
16CREATE TABLE orders (
17 order_id INT PRIMARY KEY,
18 order_date DATE,
19 ship_date DATE,
20 CONSTRAINT chk_dates CHECK (ship_date >= order_date)
21);Foreign key actions:
CASCADE: Propagate changes/deletes to child rowsSET NULL: Set foreign key to NULLSET DEFAULT: Set foreign key to default valueRESTRICT/NO ACTION: Prevent operation if child rows exist
Locking
1-- Explicit locking (MySQL)
2LOCK TABLES employees WRITE;
3-- Perform operations
4UNLOCK TABLES;
5
6-- Row-level locking
7SELECT * FROM employees WHERE employee_id = 5 FOR UPDATE;
8
9-- Pessimistic locking
10BEGIN;
11SELECT * FROM accounts WHERE account_id = 1 FOR UPDATE;
12-- Perform operations
13COMMIT;
14
15-- Optimistic locking (application-level with version column)
16UPDATE accounts
17SET balance = balance - 100, version = version + 1
18WHERE account_id = 1 AND version = @old_version;Query Optimization
EXPLAIN/EXPLAIN ANALYZE: Shows query execution plan.
1EXPLAIN SELECT * FROM employees WHERE salary > 50000;
2
3EXPLAIN ANALYZE SELECT e.*, d.department_name
4FROM employees e
5JOIN departments d ON e.department_id = d.department_id;Optimization techniques:
- Use indexes on columns in WHERE, JOIN, ORDER BY
- Avoid SELECT *, specify needed columns
- Use LIMIT when appropriate
- Avoid functions on indexed columns in WHERE (breaks index usage)
- Use EXISTS instead of IN for subqueries when checking existence
- Use JOIN instead of subqueries when possible
- Avoid LIKE with leading wildcard (’%value')
- Consider partitioning large tables
- Update statistics regularly
- Use appropriate data types
Partitioning
Splitting large tables into smaller, more manageable pieces
1-- Range partitioning (MySQL)
2CREATE TABLE sales (
3 sale_id INT,
4 sale_date DATE,
5 amount DECIMAL(10,2)
6)
7PARTITION BY RANGE (YEAR(sale_date)) (
8 PARTITION p2020 VALUES LESS THAN (2021),
9 PARTITION p2021 VALUES LESS THAN (2022),
10 PARTITION p2022 VALUES LESS THAN (2023),
11 PARTITION p_future VALUES LESS THAN MAXVALUE
12);
13
14-- List partitioning
15CREATE TABLE employees_partitioned (
16 employee_id INT,
17 first_name VARCHAR(50),
18 region VARCHAR(10)
19)
20PARTITION BY LIST (region) (
21 PARTITION p_west VALUES IN ('CA', 'OR', 'WA'),
22 PARTITION p_east VALUES IN ('NY', 'MA', 'CT'),
23 PARTITION p_other VALUES IN (DEFAULT)
24);
25
26-- Hash partitioning
27CREATE TABLE orders_partitioned (
28 order_id INT,
29 customer_id INT,
30 order_date DATE
31)
32PARTITION BY HASH(customer_id)
33PARTITIONS 4;