SQL Fundamentals

2025-11-05

SQL (Structured Query Language) is the standard language for interacting with relational databases. It’s declarative—you specify what you want, not how to get it.

Common types across systems (syntax varies by RDBMS):

Numeric:INT, BIGINT, SMALLINT, DECIMAL(p,s), NUMERIC, FLOAT, REAL, DOUBLE
Character:CHAR(n), VARCHAR(n), TEXT
Date/Time:DATE, TIME, DATETIME, TIMESTAMP, INTERVAL
Binary:BLOB, BYTEA, VARBINARY
Boolean:BOOLEAN (some systems use TINYINT)
Other:JSON, XML, UUID, ENUM, ARRAY (PostgreSQL)

Creating Tables

 1CREATE TABLE employees (
 2    employee_id INT PRIMARY KEY AUTO_INCREMENT,
 3    first_name VARCHAR(50) NOT NULL,
 4    last_name VARCHAR(50) NOT NULL,
 5    email VARCHAR(100) UNIQUE,
 6    hire_date DATE,
 7    salary DECIMAL(10, 2),
 8    department_id INT,
 9    FOREIGN KEY (department_id) REFERENCES departments(department_id)
10);

Constraints:

NOT NULL
UNIQUE
PRIMARY KEY
FOREIGN KEY
CHECK
DEFAULT

Inserting Data

 1-- Single row
 2INSERT INTO employees (first_name, last_name, email, hire_date, salary)
 3VALUES ('John', 'Doe', 'john@example.com', '2024-01-15', 75000);
 4
 5-- Multiple rows
 6INSERT INTO employees (first_name, last_name, email) 
 7VALUES 
 8    ('Jane', 'Smith', 'jane@example.com'),
 9    ('Bob', 'Johnson', 'bob@example.com');
10
11-- From another table
12INSERT INTO archived_employees
13SELECT * FROM employees WHERE hire_date < '2020-01-01';

Select Queries

 1-- Basic select
 2SELECT first_name, last_name, salary FROM employees;
 3
 4-- All columns
 5SELECT * FROM employees;
 6
 7-- With WHERE clause
 8SELECT * FROM employees WHERE salary > 60000;
 9
10-- Multiple conditions
11SELECT * FROM employees 
12WHERE salary > 60000 AND department_id = 3;
13
14-- Pattern matching
15SELECT * FROM employees WHERE last_name LIKE 'S%';
16
17-- IN operator
18SELECT * FROM employees WHERE department_id IN (1, 3, 5);
19
20-- BETWEEN
21SELECT * FROM employees WHERE salary BETWEEN 50000 AND 80000;
22
23-- IS NULL / IS NOT NULL
24SELECT * FROM employees WHERE email IS NOT NULL;
25
26-- DISTINCT
27SELECT DISTINCT department_id FROM employees;
28
29-- ORDER BY
30SELECT * FROM employees ORDER BY salary DESC, last_name ASC;
31
32-- LIMIT (MySQL/PostgreSQL) or TOP (SQL Server)
33SELECT * FROM employees ORDER BY salary DESC LIMIT 10;

Aggregate Functions

1SELECT 
2    COUNT(*) as total_employees,
3    AVG(salary) as avg_salary,
4    MIN(salary) as min_salary,
5    MAX(salary) as max_salary,
6    SUM(salary) as total_payroll
7FROM employees;

GROUP BY and HAVING

 1-- Group by department
 2SELECT department_id, COUNT(*) as employee_count, AVG(salary) as avg_salary
 3FROM employees
 4GROUP BY department_id;
 5
 6-- HAVING filters groups (WHERE filters rows)
 7SELECT department_id, AVG(salary) as avg_salary
 8FROM employees
 9GROUP BY department_id
10HAVING AVG(salary) > 70000;

Key difference: WHERE filters before grouping, HAVING filters after grouping.

INNER JOIN

Returns rows with matches in both tables

1SELECT e.first_name, e.last_name, d.department_name
2FROM employees e
3INNER JOIN departments d ON e.department_id = d.department_id;

1SELECT e.first_name, e.last_name, d.department_name
2FROM employees e, departments d
3WHERE e.department_id = d.department_id

LEFT JOIN

Returns all rows from left table, matching rows from right (NULL if no match).

1SELECT e.first_name, e.last_name, d.department_name
2FROM employees e
3LEFT JOIN departments d ON e.department_id = d.department_id;

RIGHT JOIN

Returns all rows from right table, matching rows from left.

FULL OUTER JOIN

Returns all rows from both tables (not supported in MySQL).

CROSS JOIN

Cartesian product of both tables.

1SELECT * FROM table1 CROSS JOIN table2;

SELF JOIN

Joining a table to itself

1SELECT e1.first_name, e2.first_name as manager_name
2FROM employees e1
3LEFT JOIN employees e2 ON e1.manager_id = e2.employee_id;

Subqueries

 1-- In WHERE clause
 2SELECT first_name, last_name
 3FROM employees
 4WHERE salary > (SELECT AVG(salary) FROM employees);
 5
 6-- In FROM clause (derived table)
 7SELECT dept_stats.department_id, dept_stats.avg_salary
 8FROM (
 9    SELECT department_id, AVG(salary) as avg_salary
10    FROM employees
11    GROUP BY department_id
12) as dept_stats
13WHERE dept_stats.avg_salary > 70000;
14
15-- Correlated subquery
16SELECT e1.first_name, e1.salary
17FROM employees e1
18WHERE e1.salary > (
19    SELECT AVG(e2.salary)
20    FROM employees e2
21    WHERE e2.department_id = e1.department_id
22);
23
24-- EXISTS
25SELECT d.department_name
26FROM departments d
27WHERE EXISTS (
28    SELECT 1 FROM employees e WHERE e.department_id = d.department_id
29);

Set Operations

 1-- UNION (removes duplicates)
 2SELECT first_name FROM employees
 3UNION
 4SELECT first_name FROM contractors;
 5
 6-- UNION ALL (keeps duplicates)
 7SELECT first_name FROM employees
 8UNION ALL
 9SELECT first_name FROM contractors;
10
11-- INTERSECT
12SELECT first_name FROM employees
13INTERSECT
14SELECT first_name FROM contractors;
15
16-- EXCEPT (or MINUS in Oracle)
17SELECT first_name FROM employees
18EXCEPT
19SELECT first_name FROM contractors;

UPDATE

 1-- Update single column
 2UPDATE employees SET salary = 80000 WHERE employee_id = 5;
 3
 4-- Update multiple columns
 5UPDATE employees 
 6SET salary = salary * 1.1, last_updated = NOW()
 7WHERE department_id = 3;
 8
 9-- Update with JOIN (syntax varies)
10UPDATE employees e
11INNER JOIN departments d ON e.department_id = d.department_id
12SET e.bonus = d.bonus_multiplier * e.salary
13WHERE d.department_name = 'Sales';

DELETE

 1-- Delete specific rows
 2DELETE FROM employees WHERE employee_id = 5;
 3
 4-- Delete based on condition
 5DELETE FROM employees WHERE hire_date < '2015-01-01';
 6
 7-- Delete all rows (slower, logged)
 8DELETE FROM employees;
 9
10-- TRUNCATE (faster, minimal logging, resets auto-increment)
11TRUNCATE TABLE employees;

Transactions

 1START TRANSACTION; -- or BEGIN
 2
 3UPDATE accounts SET balance = balance - 100 WHERE account_id = 1;
 4UPDATE accounts SET balance = balance + 100 WHERE account_id = 2;
 5
 6-- If everything is correct
 7COMMIT;
 8
 9-- If there's an error
10ROLLBACK;
11
12-- Savepoints
13START TRANSACTION;
14UPDATE accounts SET balance = balance - 100 WHERE account_id = 1;
15SAVEPOINT sp1;
16UPDATE accounts SET balance = balance + 100 WHERE account_id = 2;
17-- If second update fails
18ROLLBACK TO sp1;
19COMMIT;

Window Functions

Perform calculations across rows related to the current row without collapsing results.

 1-- Row number
 2SELECT 
 3    first_name,
 4    salary,
 5    ROW_NUMBER() OVER (ORDER BY salary DESC) as rank
 6FROM employees;
 7
 8-- Partition by department
 9SELECT 
10    first_name,
11    department_id,
12    salary,
13    AVG(salary) OVER (PARTITION BY department_id) as dept_avg
14FROM employees;
15
16-- Running total
17SELECT 
18    order_date,
19    amount,
20    SUM(amount) OVER (ORDER BY order_date) as running_total
21FROM orders;
22
23-- Ranking functions
24SELECT 
25    first_name,
26    salary,
27    ROW_NUMBER() OVER (ORDER BY salary DESC) as row_num,
28    RANK() OVER (ORDER BY salary DESC) as rank,
29    DENSE_RANK() OVER (ORDER BY salary DESC) as dense_rank,
30    NTILE(4) OVER (ORDER BY salary DESC) as quartile
31FROM employees;
32
33-- LAG and LEAD
34SELECT 
35    order_date,
36    amount,
37    LAG(amount, 1) OVER (ORDER BY order_date) as previous_amount,
38    LEAD(amount, 1) OVER (ORDER BY order_date) as next_amount
39FROM orders;

Common Table Expressions (CTEs)

 1-- Basic CTE
 2WITH high_earners AS (
 3    SELECT * FROM employees WHERE salary > 100000
 4)
 5SELECT * FROM high_earners WHERE department_id = 3;
 6
 7-- Multiple CTEs
 8WITH 
 9dept_totals AS (
10    SELECT department_id, SUM(salary) as total_salary
11    FROM employees
12    GROUP BY department_id
13),
14dept_averages AS (
15    SELECT department_id, AVG(salary) as avg_salary
16    FROM employees
17    GROUP BY department_id
18)
19SELECT 
20    t.department_id,
21    t.total_salary,
22    a.avg_salary
23FROM dept_totals t
24JOIN dept_averages a ON t.department_id = a.department_id;
25
26-- Recursive CTE (for hierarchies)
27WITH RECURSIVE employee_hierarchy AS (
28    -- Anchor member
29    SELECT employee_id, first_name, manager_id, 1 as level
30    FROM employees
31    WHERE manager_id IS NULL
32    
33    UNION ALL
34    
35    -- Recursive member
36    SELECT e.employee_id, e.first_name, e.manager_id, eh.level + 1
37    FROM employees e
38    INNER JOIN employee_hierarchy eh ON e.manager_id = eh.employee_id
39)
40SELECT * FROM employee_hierarchy;

CASE Expressions

 1-- Simple CASE
 2SELECT 
 3    first_name,
 4    salary,
 5    CASE department_id
 6        WHEN 1 THEN 'Sales'
 7        WHEN 2 THEN 'Engineering'
 8        WHEN 3 THEN 'HR'
 9        ELSE 'Other'
10    END as department_name
11FROM employees;
12
13-- Searched CASE
14SELECT 
15    first_name,
16    salary,
17    CASE 
18        WHEN salary < 50000 THEN 'Low'
19        WHEN salary BETWEEN 50000 AND 100000 THEN 'Medium'
20        ELSE 'High'
21    END as salary_bracket
22FROM employees;

Views

 1-- Create view
 2CREATE VIEW high_salary_employees AS
 3SELECT first_name, last_name, salary, department_id
 4FROM employees
 5WHERE salary > 80000;
 6
 7-- Use view
 8SELECT * FROM high_salary_employees WHERE department_id = 3;
 9
10-- Updatable views (restrictions apply)
11CREATE VIEW engineering_staff AS
12SELECT employee_id, first_name, last_name, salary
13FROM employees
14WHERE department_id = 2;
15
16-- Update through view
17UPDATE engineering_staff SET salary = salary * 1.1;
18
19-- Drop view
20DROP VIEW high_salary_employees;
21
22-- Materialized views (PostgreSQL)
23CREATE MATERIALIZED VIEW dept_summary AS
24SELECT department_id, COUNT(*) as employee_count, AVG(salary) as avg_salary
25FROM employees
26GROUP BY department_id;
27
28-- Refresh materialized view
29REFRESH MATERIALIZED VIEW dept_summary;

Indexes

Improve query performance by creating data structures that allow faster lookups

 1-- Create index
 2CREATE INDEX idx_employee_lastname ON employees(last_name);
 3
 4-- Composite index
 5CREATE INDEX idx_dept_salary ON employees(department_id, salary);
 6
 7-- Unique index
 8CREATE UNIQUE INDEX idx_employee_email ON employees(email);
 9
10-- Drop index
11DROP INDEX idx_employee_lastname ON employees;
12
13-- Full-text index (MySQL)
14CREATE FULLTEXT INDEX idx_description ON products(description);
15
16-- Partial index (PostgreSQL)
17CREATE INDEX idx_active_employees ON employees(last_name) 
18WHERE active = true;

When to use indexes:

Columns frequently used in WHERE clauses
Foreign keys
Columns used in JOIN conditions
Columns used in ORDER BY

Trade-offs:

Indexes speed up reads but slow down writes
Indexes consume storage space
Too many indexes can hurt performance

Stored Procedures

 1-- MySQL/PostgreSQL syntax
 2DELIMITER //
 3CREATE PROCEDURE give_raise(IN emp_id INT, IN raise_pct DECIMAL(5,2))
 4BEGIN
 5    UPDATE employees 
 6    SET salary = salary * (1 + raise_pct / 100)
 7    WHERE employee_id = emp_id;
 8END //
 9DELIMITER ;
10
11-- Call procedure
12CALL give_raise(5, 10);
13
14-- Procedure with OUT parameter
15DELIMITER //
16CREATE PROCEDURE get_employee_count(OUT emp_count INT)
17BEGIN
18    SELECT COUNT(*) INTO emp_count FROM employees;
19END //
20DELIMITER ;
21
22-- Use OUT parameter
23CALL get_employee_count(@count);
24SELECT @count;

Triggers

Automatically execute code in response to events

 1-- Before insert trigger
 2CREATE TRIGGER before_employee_insert
 3BEFORE INSERT ON employees
 4FOR EACH ROW
 5BEGIN
 6    SET NEW.created_at = NOW();
 7    SET NEW.email = LOWER(NEW.email);
 8END;
 9
10-- After update trigger
11CREATE TRIGGER after_salary_update
12AFTER UPDATE ON employees
13FOR EACH ROW
14BEGIN
15    IF NEW.salary != OLD.salary THEN
16        INSERT INTO salary_history (employee_id, old_salary, new_salary, changed_at)
17        VALUES (NEW.employee_id, OLD.salary, NEW.salary, NOW());
18    END IF;
19END;
20
21-- Drop trigger
22DROP TRIGGER before_employee_insert;

Functions

User-defined functions return a single value

 1-- Scalar function
 2CREATE FUNCTION calculate_bonus(emp_salary DECIMAL(10,2)) 
 3RETURNS DECIMAL(10,2)
 4DETERMINISTIC
 5BEGIN
 6    RETURN emp_salary * 0.1;
 7END;
 8
 9-- Use function
10SELECT first_name, salary, calculate_bonus(salary) as bonus
11FROM employees;
12
13-- Table-valued function (SQL Server)
14CREATE FUNCTION get_department_employees(@dept_id INT)
15RETURNS TABLE
16AS
17RETURN (
18    SELECT employee_id, first_name, last_name, salary
19    FROM employees
20    WHERE department_id = @dept_id
21);
22
23-- Use table function
24SELECT * FROM get_department_employees(3);

Constraints

 1-- Add constraint to existing table
 2ALTER TABLE employees
 3ADD CONSTRAINT chk_salary CHECK (salary > 0);
 4
 5ALTER TABLE employees
 6ADD CONSTRAINT fk_department 
 7FOREIGN KEY (department_id) REFERENCES departments(department_id)
 8ON DELETE CASCADE
 9ON UPDATE CASCADE;
10
11-- Drop constraint
12ALTER TABLE employees
13DROP CONSTRAINT chk_salary;
14
15-- Check constraint
16CREATE TABLE orders (
17    order_id INT PRIMARY KEY,
18    order_date DATE,
19    ship_date DATE,
20    CONSTRAINT chk_dates CHECK (ship_date >= order_date)
21);

Foreign key actions:

CASCADE: Propagate changes/deletes to child rows
SET NULL: Set foreign key to NULL
SET DEFAULT: Set foreign key to default value
RESTRICT/NO ACTION: Prevent operation if child rows exist

Locking

 1-- Explicit locking (MySQL)
 2LOCK TABLES employees WRITE;
 3-- Perform operations
 4UNLOCK TABLES;
 5
 6-- Row-level locking
 7SELECT * FROM employees WHERE employee_id = 5 FOR UPDATE;
 8
 9-- Pessimistic locking
10BEGIN;
11SELECT * FROM accounts WHERE account_id = 1 FOR UPDATE;
12-- Perform operations
13COMMIT;
14
15-- Optimistic locking (application-level with version column)
16UPDATE accounts 
17SET balance = balance - 100, version = version + 1
18WHERE account_id = 1 AND version = @old_version;

Query Optimization

EXPLAIN/EXPLAIN ANALYZE: Shows query execution plan.

1EXPLAIN SELECT * FROM employees WHERE salary > 50000;
2
3EXPLAIN ANALYZE SELECT e.*, d.department_name
4FROM employees e
5JOIN departments d ON e.department_id = d.department_id;

Optimization techniques:

Use indexes on columns in WHERE, JOIN, ORDER BY
Avoid SELECT *, specify needed columns
Use LIMIT when appropriate
Avoid functions on indexed columns in WHERE (breaks index usage)
Use EXISTS instead of IN for subqueries when checking existence
Use JOIN instead of subqueries when possible
Avoid LIKE with leading wildcard (’%value')
Consider partitioning large tables
Update statistics regularly
Use appropriate data types

Partitioning

Splitting large tables into smaller, more manageable pieces

 1-- Range partitioning (MySQL)
 2CREATE TABLE sales (
 3    sale_id INT,
 4    sale_date DATE,
 5    amount DECIMAL(10,2)
 6)
 7PARTITION BY RANGE (YEAR(sale_date)) (
 8    PARTITION p2020 VALUES LESS THAN (2021),
 9    PARTITION p2021 VALUES LESS THAN (2022),
10    PARTITION p2022 VALUES LESS THAN (2023),
11    PARTITION p_future VALUES LESS THAN MAXVALUE
12);
13
14-- List partitioning
15CREATE TABLE employees_partitioned (
16    employee_id INT,
17    first_name VARCHAR(50),
18    region VARCHAR(10)
19)
20PARTITION BY LIST (region) (
21    PARTITION p_west VALUES IN ('CA', 'OR', 'WA'),
22    PARTITION p_east VALUES IN ('NY', 'MA', 'CT'),
23    PARTITION p_other VALUES IN (DEFAULT)
24);
25
26-- Hash partitioning
27CREATE TABLE orders_partitioned (
28    order_id INT,
29    customer_id INT,
30    order_date DATE
31)
32PARTITION BY HASH(customer_id)
33PARTITIONS 4;

#data-engineering #study-plan #career-development #zoomcamp