// Every clause, function & pattern — from SELECT to window functions — with real examples and interview Q&A
-- Select all columns SELECT * FROM patients; -- Select specific columns SELECT patient_id, first_name, last_name, age FROM patients; -- Column aliases with AS SELECT patient_id AS id, first_name || ' ' || last_name AS full_name, age AS patient_age FROM patients; -- DISTINCT — removes duplicates SELECT DISTINCT country FROM adverse_events; -- Expressions and calculations SELECT drug_name, unit_price * quantity AS total_cost, ROUND(dosage_mg / weight_kg, 2) AS dose_per_kg FROM prescriptions;
SELECT * — All columns (avoid in production)SELECT DISTINCT — Unique rows onlySELECT TOP n — First n rows (SQL Server)SELECT col AS alias — Rename column in resultSELECT 1+1 — Expressions without a tableFROM table — Simple tableFROM table alias — Table with aliasFROM (subquery) alias — Derived tableFROM schema.table — Fully qualifiedFROM table1, table2 — Cross join (implicit)-- Comparison operators SELECT * FROM adverse_events WHERE severity = 'Serious' AND report_date >= '2024-01-01'; -- IN — match multiple values SELECT * FROM cases WHERE country IN ('India', 'USA', 'UK'); -- BETWEEN — inclusive range SELECT * FROM patients WHERE age BETWEEN 18 AND 65; -- LIKE — pattern matching SELECT * FROM drugs WHERE drug_name LIKE 'Ator%'; -- starts with Ator WHERE drug_name LIKE '%statin'; -- ends with statin WHERE drug_name LIKE '%stat%'; -- contains stat WHERE drug_name LIKE 'A_a%'; -- _ = any single char -- IS NULL / IS NOT NULL SELECT * FROM cases WHERE resolution_date IS NULL; -- unresolved cases -- NOT operator SELECT * FROM adverse_events WHERE outcome NOT IN ('Recovered', 'Resolved'); -- EXISTS — check if subquery returns rows SELECT * FROM patients p WHERE EXISTS ( SELECT 1 FROM adverse_events ae WHERE ae.patient_id = p.patient_id AND ae.severity = 'Fatal' );
| Operator | Usage | Example |
|---|---|---|
= | Exact match | WHERE status = 'Active' |
!= / <> | Not equal | WHERE status != 'Closed' |
> < >= <= | Range comparison | WHERE age >= 18 |
BETWEEN a AND b | Inclusive range | WHERE age BETWEEN 18 AND 65 |
IN (...) | Value in list | WHERE country IN ('US','UK') |
NOT IN (...) | Value not in list | WHERE outcome NOT IN ('Fatal') |
LIKE 'pat%' | Pattern match | WHERE name LIKE 'A%' |
IS NULL | Null check | WHERE end_date IS NULL |
EXISTS (subq) | Row existence check | WHERE EXISTS (SELECT 1 ...) |
AND / OR / NOT | Logical operators | WHERE a=1 AND b=2 OR c=3 |
-- Count cases per country SELECT country, COUNT(*) AS total_cases, COUNT(DISTINCT patient_id) AS unique_patients FROM adverse_events GROUP BY country ORDER BY total_cases DESC; -- HAVING: only countries with 10+ serious cases SELECT country, COUNT(*) AS serious_cases FROM adverse_events WHERE severity = 'Serious' -- filter rows FIRST GROUP BY country HAVING COUNT(*) >= 10 -- filter groups AFTER ORDER BY serious_cases DESC; -- Multi-column GROUP BY SELECT drug_name, reaction_type, COUNT(*) AS case_count, AVG(patient_age) AS avg_age, MAX(report_date) AS latest_report FROM adverse_events GROUP BY drug_name, reaction_type HAVING COUNT(*) > 5 AND AVG(patient_age) > 40; -- ROLLUP — adds subtotals and grand total SELECT country, drug_name, COUNT(*) AS cnt FROM adverse_events GROUP BY ROLLUP(country, drug_name);
-- Basic sorting SELECT case_id, report_date, severity FROM adverse_events ORDER BY report_date DESC; -- newest first -- Multi-column sort ORDER BY country ASC, report_date DESC; -- Sort with NULL handling ORDER BY resolution_date DESC NULLS LAST; -- LIMIT / FETCH FIRST (top N rows) SELECT * FROM adverse_events ORDER BY report_date DESC LIMIT 10; -- MySQL / PostgreSQL -- Pagination with OFFSET SELECT * FROM cases ORDER BY case_id LIMIT 20 OFFSET 40; -- page 3 (rows 41-60) -- TOP n (SQL Server / MS Access) SELECT TOP 5 * FROM adverse_events ORDER BY severity_score DESC; -- FETCH FIRST (Oracle / DB2 / PostgreSQL 13+) SELECT * FROM cases ORDER BY report_date FETCH FIRST 10 ROWS ONLY;
-- INNER JOIN: cases with matching patient records SELECT c.case_id, c.drug_name, p.first_name, p.age, p.country FROM cases c INNER JOIN patients p ON c.patient_id = p.patient_id; -- LEFT JOIN: all cases, even without patient info SELECT c.case_id, p.first_name, -- NULL if no patient found p.age FROM cases c LEFT JOIN patients p ON c.patient_id = p.patient_id; -- Find cases with NO patient record (anti-join pattern) SELECT c.case_id FROM cases c LEFT JOIN patients p ON c.patient_id = p.patient_id WHERE p.patient_id IS NULL; -- no match = orphan case -- FULL OUTER JOIN SELECT COALESCE(a.drug_name, b.drug_name) AS drug, a.q1_cases, b.q2_cases FROM q1_report a FULL OUTER JOIN q2_report b ON a.drug_name = b.drug_name; -- Multi-table JOIN SELECT c.case_id, p.first_name, d.drug_name, d.atc_code, ae.reaction_term FROM cases c JOIN patients p ON c.patient_id = p.patient_id JOIN drugs d ON c.drug_id = d.drug_id JOIN reactions ae ON c.case_id = ae.case_id; -- SELF JOIN: find patients in the same country SELECT a.patient_id AS patient_1, b.patient_id AS patient_2, a.country FROM patients a JOIN patients b ON a.country = b.country AND a.patient_id < b.patient_id; -- avoid duplicates
SELECT COUNT(*) AS total_rows, COUNT(patient_id) AS non_null_patients, COUNT(DISTINCT drug_name) AS unique_drugs, SUM(case_count) AS total_cases, AVG(patient_age) AS avg_age, MIN(report_date) AS earliest_report, MAX(report_date) AS latest_report, ROUND(AVG(patient_age), 1) AS avg_age_rounded FROM adverse_events; -- COUNT(*) vs COUNT(col): COUNT(*) counts all rows, -- COUNT(col) ignores NULLs in that column SELECT COUNT(*) AS all_cases, COUNT(follow_up_date) AS cases_with_followup FROM cases; -- STRING_AGG / GROUP_CONCAT — aggregate strings SELECT patient_id, STRING_AGG(drug_name, ', ') AS all_drugs_taken FROM prescriptions GROUP BY patient_id;
| Function | NULLs | Result Type | Common Use |
|---|---|---|---|
COUNT(*) | Counts NULLs | Integer | Total rows in group |
COUNT(col) | Ignores NULLs | Integer | Non-null values |
COUNT(DISTINCT col) | Ignores NULLs | Integer | Unique values |
SUM(col) | Ignores NULLs | Numeric | Total of a numeric column |
AVG(col) | Ignores NULLs | Decimal | Mean value |
MIN(col) | Ignores NULLs | Same as col | Smallest/earliest value |
MAX(col) | Ignores NULLs | Same as col | Largest/latest value |
-- 1. Scalar subquery in SELECT (returns 1 value) SELECT country, COUNT(*) AS cases, ( SELECT COUNT(*) FROM adverse_events ) AS global_total FROM adverse_events GROUP BY country; -- 2. Subquery in WHERE (filter using derived value) SELECT * FROM adverse_events WHERE patient_age > ( SELECT AVG(patient_age) FROM adverse_events -- patients older than average ); -- 3. IN with subquery SELECT drug_name FROM drugs WHERE drug_id IN ( SELECT DISTINCT drug_id FROM adverse_events WHERE severity = 'Fatal' -- drugs with fatal events ); -- 4. Derived table in FROM SELECT country, avg_cases FROM ( SELECT country, AVG(case_count) AS avg_cases FROM monthly_summary GROUP BY country ) AS country_avg WHERE avg_cases > 50; -- 5. Correlated subquery (references outer query) SELECT p.patient_id, p.first_name, ( SELECT COUNT(*) FROM adverse_events ae WHERE ae.patient_id = p.patient_id ) AS total_events FROM patients p;
WITH clause) does the same thing as a derived table subquery but is:-- Basic CTE WITH serious_cases AS ( SELECT * FROM adverse_events WHERE severity = 'Serious' AND report_date >= '2024-01-01' ) SELECT country, COUNT(*) AS count FROM serious_cases GROUP BY country; -- Multiple CTEs (chain them with commas) WITH case_counts AS ( SELECT drug_id, COUNT(*) AS total_cases FROM adverse_events GROUP BY drug_id ), serious_counts AS ( SELECT drug_id, COUNT(*) AS serious_cases FROM adverse_events WHERE severity = 'Serious' GROUP BY drug_id ) SELECT d.drug_name, cc.total_cases, sc.serious_cases, ROUND(sc.serious_cases * 100.0 / cc.total_cases, 2) AS pct_serious FROM drugs d JOIN case_counts cc ON d.drug_id = cc.drug_id JOIN serious_counts sc ON d.drug_id = sc.drug_id ORDER BY pct_serious DESC; -- Recursive CTE (org hierarchy / tree traversal) WITH RECURSIVE org_tree AS ( -- Anchor: top-level (no manager) SELECT emp_id, name, manager_id, 1 AS level FROM employees WHERE manager_id IS NULL UNION ALL -- Recursive: join children to parent SELECT e.emp_id, e.name, e.manager_id, ot.level + 1 FROM employees e JOIN org_tree ot ON e.manager_id = ot.emp_id ) SELECT * FROM org_tree ORDER BY level, name;
Unlike GROUP BY which collapses rows, window functions keep all rows and add a computed column based on a "window" of related rows. They never reduce the row count.
-- ROW_NUMBER: unique sequential number per partition SELECT case_id, country, report_date, ROW_NUMBER() OVER ( PARTITION BY country ORDER BY report_date ) AS row_num FROM adverse_events; -- RANK vs DENSE_RANK (ties handling) SELECT drug_name, case_count, RANK() OVER (ORDER BY case_count DESC) AS rank, DENSE_RANK() OVER (ORDER BY case_count DESC) AS dense_rank FROM drug_summary; -- RANK skips numbers after ties (1,2,2,4) -- DENSE_RANK does not skip (1,2,2,3) -- Get the most recent case per country (top 1 per group) SELECT * FROM ( SELECT *, ROW_NUMBER() OVER ( PARTITION BY country ORDER BY report_date DESC ) AS rn FROM adverse_events ) ranked WHERE rn = 1; -- Running total (cumulative SUM) SELECT report_date, daily_cases, SUM(daily_cases) OVER ( ORDER BY report_date ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW ) AS cumulative_cases FROM daily_summary; -- Moving average (last 7 days) SELECT report_date, daily_cases, AVG(daily_cases) OVER ( ORDER BY report_date ROWS BETWEEN 6 PRECEDING AND CURRENT ROW ) AS rolling_7day_avg FROM daily_summary; -- LAG/LEAD: compare to previous/next row SELECT report_date, daily_cases, LAG(daily_cases, 1) OVER (ORDER BY report_date) AS prev_day, LEAD(daily_cases, 1) OVER (ORDER BY report_date) AS next_day, daily_cases - LAG(daily_cases, 1) OVER (ORDER BY report_date) AS day_change FROM daily_summary; -- NTILE: divide into quartiles SELECT patient_id, age, NTILE(4) OVER (ORDER BY age) AS age_quartile FROM patients; -- PERCENT_RANK and CUME_DIST SELECT drug_name, case_count, PERCENT_RANK() OVER (ORDER BY case_count) AS pct_rank, CUME_DIST() OVER (ORDER BY case_count) AS cume_dist FROM drug_summary;
SELECT UPPER(drug_name) AS upper_name, LOWER(drug_name) AS lower_name, LENGTH(drug_name) AS name_length, TRIM(drug_name) AS trimmed, LTRIM(drug_name) AS left_trimmed, RTRIM(drug_name) AS right_trimmed, REPLACE(drug_name, ' ', '_') AS underscored, SUBSTRING(drug_name, 1, 5) AS first_5_chars, LEFT(drug_name, 3) AS first_3, RIGHT(drug_name, 3) AS last_3, CONCAT(first_name, ' ', last_name) AS full_name, first_name || ' ' || last_name AS full_name_alt, CHARINDEX('statin', drug_name) AS statin_pos, LPAD(case_id::TEXT, 8, '0') AS padded_id FROM drugs;
SELECT CURRENT_DATE AS today, NOW() AS now_timestamp, EXTRACT(YEAR FROM report_date) AS report_year, EXTRACT(MONTH FROM report_date) AS report_month, DATE_TRUNC('month', report_date) AS month_start, DATE_TRUNC('year', report_date) AS year_start, AGE(CURRENT_DATE, report_date) AS days_since_report, CURRENT_DATE - report_date AS days_elapsed, report_date + INTERVAL '15 days' AS due_date, TO_CHAR(report_date, 'DD-Mon-YYYY') AS formatted_date FROM adverse_events; -- Cases submitted within 15-day window (PV use case!) SELECT case_id, receipt_date, submission_date, (submission_date - receipt_date) AS days_to_submit, CASE WHEN (submission_date - receipt_date) <= 15 THEN 'On Time' ELSE 'Late' END AS compliance_status FROM icsr_submissions;
-- Searched CASE (most common) SELECT case_id, patient_age, CASE WHEN patient_age < 18 THEN 'Paediatric' WHEN patient_age < 65 THEN 'Adult' WHEN patient_age >= 65 THEN 'Elderly' ELSE 'Unknown' END AS age_group FROM adverse_events; -- Simple CASE (equality check only) SELECT CASE severity WHEN 'Fatal' THEN 4 WHEN 'Life-threatening' THEN 3 WHEN 'Serious' THEN 2 WHEN 'Non-Serious' THEN 1 ELSE 0 END AS severity_score FROM adverse_events; -- CASE in aggregate (conditional counting) SELECT drug_name, COUNT(*) AS total_cases, SUM(CASE WHEN severity = 'Fatal' THEN 1 ELSE 0 END) AS fatal_count, SUM(CASE WHEN severity = 'Serious' THEN 1 ELSE 0 END) AS serious_count, SUM(CASE WHEN outcome = 'Recovered' THEN 1 ELSE 0 END) AS recovered FROM adverse_events GROUP BY drug_name; -- COALESCE — return first non-null value SELECT case_id, COALESCE(reporter_email, reporter_phone, 'No contact') AS contact FROM cases; -- NULLIF — return NULL if two values are equal SELECT NULLIF(denominator, 0) -- avoids divide-by-zero FROM calculations;
-- Create index CREATE INDEX idx_cases_country ON adverse_events (country); -- Composite index (column order matters!) CREATE INDEX idx_cases_country_date ON adverse_events (country, report_date); -- Unique index (enforces uniqueness) CREATE UNIQUE INDEX idx_case_unique ON cases (worldwide_case_id); -- EXPLAIN: see query execution plan EXPLAIN ANALYZE SELECT * FROM adverse_events WHERE country = 'India' AND report_date >= '2024-01-01'; -- Look for: Seq Scan (bad) vs Index Scan (good)
| Tip | Explanation |
|---|---|
| Use WHERE on indexed columns | Queries filter faster when WHERE clause columns are indexed |
| Avoid functions on indexed cols | WHERE UPPER(name) = 'X' defeats the index — use functional index instead |
| SELECT only needed columns | Avoid SELECT * — fetching fewer columns reduces I/O |
| Composite index left-prefix rule | Index on (A,B,C) — queries on A or A+B or A+B+C use it; B alone does not |
| Use LIMIT for large results | Always paginate large result sets |
| JOIN on indexed FK columns | Foreign key columns used in JOINs should always be indexed |
-- CREATE TABLE CREATE TABLE adverse_events ( case_id SERIAL PRIMARY KEY, patient_id INT NOT NULL, drug_name VARCHAR(200) NOT NULL, severity VARCHAR(50) DEFAULT 'Non-Serious', report_date DATE NOT NULL, country VARCHAR(100), created_at TIMESTAMP DEFAULT NOW(), FOREIGN KEY (patient_id) REFERENCES patients(patient_id) ); -- ALTER TABLE ALTER TABLE adverse_events ADD COLUMN outcome VARCHAR(100); ALTER TABLE adverse_events RENAME COLUMN drug_name TO suspect_drug; -- DROP TABLE (destructive!) DROP TABLE IF EXISTS temp_cases;
-- INSERT single row INSERT INTO adverse_events (patient_id, drug_name, severity, report_date) VALUES (1001, 'Atorvastatin', 'Serious', '2024-06-15'); -- INSERT from SELECT INSERT INTO archive_cases SELECT * FROM adverse_events WHERE report_date < '2020-01-01'; -- UPDATE UPDATE adverse_events SET outcome = 'Recovered', updated_at = NOW() WHERE case_id = 5042; -- DELETE specific rows DELETE FROM adverse_events WHERE report_date < '2015-01-01' AND severity = 'Non-Serious'; -- TRUNCATE (removes ALL rows, faster than DELETE) TRUNCATE TABLE temp_staging; -- UPSERT: INSERT or UPDATE if exists (PostgreSQL) INSERT INTO cases (case_id, status) VALUES (9999, 'Open') ON CONFLICT (case_id) DO UPDATE SET status = EXCLUDED.status;
WHERE age > 18 filters patients before counting. HAVING COUNT(*) > 5 filters groups that have more than 5 cases.-- Find duplicates by case_id SELECT worldwide_case_id, COUNT(*) AS occurrences FROM icsr_cases GROUP BY worldwide_case_id HAVING COUNT(*) > 1; -- Show all columns of duplicate rows SELECT * FROM icsr_cases WHERE worldwide_case_id IN ( SELECT worldwide_case_id FROM icsr_cases GROUP BY worldwide_case_id HAVING COUNT(*) > 1 );
-- Method 1: Using DENSE_RANK (best approach) SELECT drug_name, case_count FROM ( SELECT drug_name, case_count, DENSE_RANK() OVER (ORDER BY case_count DESC) AS dr FROM drug_summary ) ranked WHERE dr = 3; -- change 3 to any N -- Method 2: Using LIMIT OFFSET SELECT DISTINCT case_count FROM drug_summary ORDER BY case_count DESC LIMIT 1 OFFSET 2; -- OFFSET N-1
SELECT a.patient_id AS patient_1, b.patient_id AS patient_2, a.country FROM patients a JOIN patients b ON a.country = b.country AND a.patient_id < b.patient_id;
SELECT drug_name, COUNT(*) AS serious_cases, COUNT(DISTINCT country) AS countries_affected, MIN(report_date) AS first_report, MAX(report_date) AS latest_report, RANK() OVER (ORDER BY COUNT(*) DESC) AS rank_by_cases FROM adverse_events WHERE severity = 'Serious' AND EXTRACT(YEAR FROM report_date) = 2024 GROUP BY drug_name HAVING COUNT(*) > 10 ORDER BY serious_cases DESC;
WITH compliance_check AS ( SELECT case_id, receipt_date, submission_date, country, severity, (submission_date - receipt_date) AS days_to_submit, CASE WHEN submission_date IS NULL THEN 'Not Yet Submitted' WHEN (submission_date - receipt_date) <= 15 THEN 'Compliant' ELSE 'Late' END AS status FROM icsr_submissions WHERE severity = 'Serious' ) SELECT status, COUNT(*) AS case_count, ROUND(AVG(days_to_submit), 1) AS avg_days, MAX(days_to_submit) AS worst_case_days FROM compliance_check GROUP BY status ORDER BY case_count DESC;