← Back to Interview Hub

Complete Reference Guide

SQL
End-to-End

// Every clause, function & pattern — from SELECT to window functions — with real examples and interview Q&A

SELECT → GROUP BY → JOINS

Subqueries & CTEs

Window Functions

Interview Scenarios

SELECT & FROM

// The foundation of every query — retrieving data

SQL Execution Order — Always Remember This

FROM

→

JOIN

→

WHERE

→

GROUP BY

→

HAVING

→

SELECT

→

ORDER BY

→

LIMIT

SELECT is processed 6th — that's why you can't use SELECT aliases in WHERE or GROUP BY (in most DBs)

Basic SELECT Syntax

-- Select all columns
SELECT * FROM patients;

-- Select specific columns
SELECT patient_id, first_name, last_name, age
FROM patients;

-- Column aliases with AS
SELECT
    patient_id               AS id,
    first_name || ' ' || last_name  AS full_name,
    age                      AS patient_age
FROM patients;

-- DISTINCT — removes duplicates
SELECT DISTINCT country
FROM adverse_events;

-- Expressions and calculations
SELECT
    drug_name,
    unit_price * quantity   AS total_cost,
    ROUND(dosage_mg / weight_kg, 2)  AS dose_per_kg
FROM prescriptions;

SELECT variations

SELECT * — All columns (avoid in production)
SELECT DISTINCT — Unique rows only
SELECT TOP n — First n rows (SQL Server)
SELECT col AS alias — Rename column in result
SELECT 1+1 — Expressions without a table

FROM variations

FROM table — Simple table
FROM table alias — Table with alias
FROM (subquery) alias — Derived table
FROM schema.table — Fully qualified
FROM table1, table2 — Cross join (implicit)

WHERE Clause

// Filter rows before aggregation

WHERE — All Operators

-- Comparison operators
SELECT * FROM adverse_events
WHERE severity = 'Serious'
  AND report_date >= '2024-01-01';

-- IN — match multiple values
SELECT * FROM cases
WHERE country IN ('India', 'USA', 'UK');

-- BETWEEN — inclusive range
SELECT * FROM patients
WHERE age BETWEEN 18 AND 65;

-- LIKE — pattern matching
SELECT * FROM drugs
WHERE drug_name LIKE 'Ator%';       -- starts with Ator
WHERE drug_name LIKE '%statin';     -- ends with statin
WHERE drug_name LIKE '%stat%';     -- contains stat
WHERE drug_name LIKE 'A_a%';       -- _ = any single char

-- IS NULL / IS NOT NULL
SELECT * FROM cases
WHERE resolution_date IS NULL;     -- unresolved cases

-- NOT operator
SELECT * FROM adverse_events
WHERE outcome NOT IN ('Recovered', 'Resolved');

-- EXISTS — check if subquery returns rows
SELECT * FROM patients p
WHERE EXISTS (
    SELECT 1 FROM adverse_events ae
    WHERE ae.patient_id = p.patient_id
      AND ae.severity = 'Fatal'
);

Operator	Usage	Example
`=`	Exact match	`WHERE status = 'Active'`
`!= / <>`	Not equal	`WHERE status != 'Closed'`
`> < >= <=`	Range comparison	`WHERE age >= 18`
`BETWEEN a AND b`	Inclusive range	`WHERE age BETWEEN 18 AND 65`
`IN (...)`	Value in list	`WHERE country IN ('US','UK')`
`NOT IN (...)`	Value not in list	`WHERE outcome NOT IN ('Fatal')`
`LIKE 'pat%'`	Pattern match	`WHERE name LIKE 'A%'`
`IS NULL`	Null check	`WHERE end_date IS NULL`
`EXISTS (subq)`	Row existence check	`WHERE EXISTS (SELECT 1 ...)`
`AND / OR / NOT`	Logical operators	`WHERE a=1 AND b=2 OR c=3`

GROUP BY & HAVING

// Aggregation and filtering aggregated results

WHERE vs HAVING — The Key Difference

WHERE filters rows before grouping — acts on individual rows.
HAVING filters groups after aggregation — acts on aggregate results.
You cannot use aggregate functions in WHERE — use HAVING instead.

GROUP BY & HAVING Examples

-- Count cases per country
SELECT
    country,
    COUNT(*)        AS total_cases,
    COUNT(DISTINCT patient_id)  AS unique_patients
FROM adverse_events
GROUP BY country
ORDER BY total_cases DESC;

-- HAVING: only countries with 10+ serious cases
SELECT
    country,
    COUNT(*) AS serious_cases
FROM adverse_events
WHERE severity = 'Serious'          -- filter rows FIRST
GROUP BY country
HAVING COUNT(*) >= 10               -- filter groups AFTER
ORDER BY serious_cases DESC;

-- Multi-column GROUP BY
SELECT
    drug_name,
    reaction_type,
    COUNT(*)            AS case_count,
    AVG(patient_age)    AS avg_age,
    MAX(report_date)    AS latest_report
FROM adverse_events
GROUP BY drug_name, reaction_type
HAVING COUNT(*) > 5
   AND AVG(patient_age) > 40;

-- ROLLUP — adds subtotals and grand total
SELECT country, drug_name, COUNT(*) AS cnt
FROM adverse_events
GROUP BY ROLLUP(country, drug_name);

ORDER BY & LIMIT

// Sorting and paginating results

ORDER BY & LIMIT / OFFSET

-- Basic sorting
SELECT case_id, report_date, severity
FROM adverse_events
ORDER BY report_date DESC;         -- newest first

-- Multi-column sort
ORDER BY country ASC, report_date DESC;

-- Sort with NULL handling
ORDER BY resolution_date DESC NULLS LAST;

-- LIMIT / FETCH FIRST (top N rows)
SELECT * FROM adverse_events
ORDER BY report_date DESC
LIMIT 10;                            -- MySQL / PostgreSQL

-- Pagination with OFFSET
SELECT * FROM cases
ORDER BY case_id
LIMIT 20 OFFSET 40;                 -- page 3 (rows 41-60)

-- TOP n (SQL Server / MS Access)
SELECT TOP 5 * FROM adverse_events
ORDER BY severity_score DESC;

-- FETCH FIRST (Oracle / DB2 / PostgreSQL 13+)
SELECT * FROM cases
ORDER BY report_date
FETCH FIRST 10 ROWS ONLY;

JOINs — All Types

// Combining data from multiple tables

INNER JOIN

Returns rows where there is a match in both tables. Non-matching rows from either table are excluded.

LEFT JOIN (LEFT OUTER)

Returns all rows from the left table + matching rows from right. Non-matches on right = NULL.

RIGHT JOIN (RIGHT OUTER)

Returns all rows from the right table + matching rows from left. Non-matches on left = NULL.

FULL OUTER JOIN

Returns all rows from both tables. NULLs where no match exists on either side.

CROSS JOIN

Returns the Cartesian product — every row of table A combined with every row of table B. No ON condition.

SELF JOIN

Joins a table to itself. Used for hierarchical or comparative queries within one table.

All JOIN Types — Examples

-- INNER JOIN: cases with matching patient records
SELECT
    c.case_id,
    c.drug_name,
    p.first_name,
    p.age,
    p.country
FROM cases c
INNER JOIN patients p
    ON c.patient_id = p.patient_id;

-- LEFT JOIN: all cases, even without patient info
SELECT
    c.case_id,
    p.first_name,   -- NULL if no patient found
    p.age
FROM cases c
LEFT JOIN patients p
    ON c.patient_id = p.patient_id;

-- Find cases with NO patient record (anti-join pattern)
SELECT c.case_id
FROM cases c
LEFT JOIN patients p ON c.patient_id = p.patient_id
WHERE p.patient_id IS NULL;         -- no match = orphan case

-- FULL OUTER JOIN
SELECT
    COALESCE(a.drug_name, b.drug_name) AS drug,
    a.q1_cases,
    b.q2_cases
FROM q1_report a
FULL OUTER JOIN q2_report b
    ON a.drug_name = b.drug_name;

-- Multi-table JOIN
SELECT
    c.case_id,
    p.first_name,
    d.drug_name,
    d.atc_code,
    ae.reaction_term
FROM cases c
JOIN patients p  ON c.patient_id = p.patient_id
JOIN drugs d     ON c.drug_id    = d.drug_id
JOIN reactions ae ON c.case_id   = ae.case_id;

-- SELF JOIN: find patients in the same country
SELECT
    a.patient_id  AS patient_1,
    b.patient_id  AS patient_2,
    a.country
FROM patients a
JOIN patients b
    ON a.country = b.country
   AND a.patient_id < b.patient_id; -- avoid duplicates

Aggregate Functions

// COUNT, SUM, AVG, MIN, MAX and more

Aggregate Functions

SELECT
    COUNT(*)                   AS total_rows,
    COUNT(patient_id)          AS non_null_patients,
    COUNT(DISTINCT drug_name)  AS unique_drugs,
    SUM(case_count)            AS total_cases,
    AVG(patient_age)           AS avg_age,
    MIN(report_date)           AS earliest_report,
    MAX(report_date)           AS latest_report,
    ROUND(AVG(patient_age), 1) AS avg_age_rounded
FROM adverse_events;

-- COUNT(*) vs COUNT(col): COUNT(*) counts all rows,
-- COUNT(col) ignores NULLs in that column
SELECT
    COUNT(*)               AS all_cases,
    COUNT(follow_up_date)  AS cases_with_followup
FROM cases;

-- STRING_AGG / GROUP_CONCAT — aggregate strings
SELECT
    patient_id,
    STRING_AGG(drug_name, ', ') AS all_drugs_taken
FROM prescriptions
GROUP BY patient_id;

Function	NULLs	Result Type	Common Use
`COUNT(*)`	Counts NULLs	Integer	Total rows in group
`COUNT(col)`	Ignores NULLs	Integer	Non-null values
`COUNT(DISTINCT col)`	Ignores NULLs	Integer	Unique values
`SUM(col)`	Ignores NULLs	Numeric	Total of a numeric column
`AVG(col)`	Ignores NULLs	Decimal	Mean value
`MIN(col)`	Ignores NULLs	Same as col	Smallest/earliest value
`MAX(col)`	Ignores NULLs	Same as col	Largest/latest value

Subqueries

// Nested queries inside SELECT, FROM, WHERE

Subquery Types

-- 1. Scalar subquery in SELECT (returns 1 value)
SELECT
    country,
    COUNT(*) AS cases,
    (
        SELECT COUNT(*) FROM adverse_events
    ) AS global_total
FROM adverse_events
GROUP BY country;

-- 2. Subquery in WHERE (filter using derived value)
SELECT * FROM adverse_events
WHERE patient_age > (
    SELECT AVG(patient_age)
    FROM adverse_events          -- patients older than average
);

-- 3. IN with subquery
SELECT drug_name FROM drugs
WHERE drug_id IN (
    SELECT DISTINCT drug_id
    FROM adverse_events
    WHERE severity = 'Fatal'    -- drugs with fatal events
);

-- 4. Derived table in FROM
SELECT country, avg_cases
FROM (
    SELECT
        country,
        AVG(case_count) AS avg_cases
    FROM monthly_summary
    GROUP BY country
) AS country_avg
WHERE avg_cases > 50;

-- 5. Correlated subquery (references outer query)
SELECT
    p.patient_id,
    p.first_name,
    (
        SELECT COUNT(*)
        FROM adverse_events ae
        WHERE ae.patient_id = p.patient_id
    ) AS total_events
FROM patients p;

CTEs — WITH Clause

// Common Table Expressions — clean, readable, reusable

CTE vs Subquery

A CTE (WITH clause) does the same thing as a derived table subquery but is:
→ More readable — named and defined before the main query
→ Reusable — can reference the same CTE multiple times
→ Recursive — supports recursive queries (hierarchies, trees)
→ Debuggable — easier to isolate and test each step

CTE Examples

-- Basic CTE
WITH serious_cases AS (
    SELECT *
    FROM adverse_events
    WHERE severity = 'Serious'
      AND report_date >= '2024-01-01'
)
SELECT country, COUNT(*) AS count
FROM serious_cases
GROUP BY country;

-- Multiple CTEs (chain them with commas)
WITH
case_counts AS (
    SELECT drug_id, COUNT(*) AS total_cases
    FROM adverse_events
    GROUP BY drug_id
),
serious_counts AS (
    SELECT drug_id, COUNT(*) AS serious_cases
    FROM adverse_events
    WHERE severity = 'Serious'
    GROUP BY drug_id
)
SELECT
    d.drug_name,
    cc.total_cases,
    sc.serious_cases,
    ROUND(sc.serious_cases * 100.0 / cc.total_cases, 2) AS pct_serious
FROM drugs d
JOIN case_counts cc  ON d.drug_id = cc.drug_id
JOIN serious_counts sc ON d.drug_id = sc.drug_id
ORDER BY pct_serious DESC;

-- Recursive CTE (org hierarchy / tree traversal)
WITH RECURSIVE org_tree AS (
    -- Anchor: top-level (no manager)
    SELECT emp_id, name, manager_id, 1 AS level
    FROM employees
    WHERE manager_id IS NULL

    UNION ALL

    -- Recursive: join children to parent
    SELECT e.emp_id, e.name, e.manager_id, ot.level + 1
    FROM employees e
    JOIN org_tree ot ON e.manager_id = ot.emp_id
)
SELECT * FROM org_tree ORDER BY level, name;

Window Functions

// OVER(), PARTITION BY, ROW_NUMBER, RANK, LAG, LEAD

What makes window functions different?

Unlike GROUP BY which collapses rows, window functions keep all rows and add a computed column based on a "window" of related rows. They never reduce the row count.

Window Functions — Full Reference

-- ROW_NUMBER: unique sequential number per partition
SELECT
    case_id, country, report_date,
    ROW_NUMBER() OVER (
        PARTITION BY country
        ORDER BY report_date
    ) AS row_num
FROM adverse_events;

-- RANK vs DENSE_RANK (ties handling)
SELECT
    drug_name, case_count,
    RANK()       OVER (ORDER BY case_count DESC) AS rank,
    DENSE_RANK() OVER (ORDER BY case_count DESC) AS dense_rank
FROM drug_summary;
-- RANK skips numbers after ties (1,2,2,4)
-- DENSE_RANK does not skip (1,2,2,3)

-- Get the most recent case per country (top 1 per group)
SELECT * FROM (
    SELECT
        *,
        ROW_NUMBER() OVER (
            PARTITION BY country
            ORDER BY report_date DESC
        ) AS rn
    FROM adverse_events
) ranked
WHERE rn = 1;

-- Running total (cumulative SUM)
SELECT
    report_date,
    daily_cases,
    SUM(daily_cases) OVER (
        ORDER BY report_date
        ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW
    ) AS cumulative_cases
FROM daily_summary;

-- Moving average (last 7 days)
SELECT
    report_date,
    daily_cases,
    AVG(daily_cases) OVER (
        ORDER BY report_date
        ROWS BETWEEN 6 PRECEDING AND CURRENT ROW
    ) AS rolling_7day_avg
FROM daily_summary;

-- LAG/LEAD: compare to previous/next row
SELECT
    report_date,
    daily_cases,
    LAG(daily_cases, 1) OVER (ORDER BY report_date) AS prev_day,
    LEAD(daily_cases, 1) OVER (ORDER BY report_date) AS next_day,
    daily_cases - LAG(daily_cases, 1) OVER (ORDER BY report_date) AS day_change
FROM daily_summary;

-- NTILE: divide into quartiles
SELECT
    patient_id, age,
    NTILE(4) OVER (ORDER BY age) AS age_quartile
FROM patients;

-- PERCENT_RANK and CUME_DIST
SELECT
    drug_name,
    case_count,
    PERCENT_RANK() OVER (ORDER BY case_count) AS pct_rank,
    CUME_DIST()    OVER (ORDER BY case_count) AS cume_dist
FROM drug_summary;

String Functions

// UPPER, LOWER, TRIM, CONCAT, SUBSTRING, REPLACE…

String Functions

SELECT
    UPPER(drug_name)                         AS upper_name,
    LOWER(drug_name)                         AS lower_name,
    LENGTH(drug_name)                        AS name_length,
    TRIM(drug_name)                          AS trimmed,
    LTRIM(drug_name)                         AS left_trimmed,
    RTRIM(drug_name)                         AS right_trimmed,
    REPLACE(drug_name, ' ', '_')            AS underscored,
    SUBSTRING(drug_name, 1, 5)              AS first_5_chars,
    LEFT(drug_name, 3)                       AS first_3,
    RIGHT(drug_name, 3)                      AS last_3,
    CONCAT(first_name, ' ', last_name)      AS full_name,
    first_name || ' ' || last_name          AS full_name_alt,
    CHARINDEX('statin', drug_name)          AS statin_pos,
    LPAD(case_id::TEXT, 8, '0')             AS padded_id
FROM drugs;

Date Functions

// DATEPART, DATEDIFF, DATE_TRUNC, EXTRACT…

Date Functions (PostgreSQL / ANSI SQL)

SELECT
    CURRENT_DATE                                      AS today,
    NOW()                                              AS now_timestamp,
    EXTRACT(YEAR FROM report_date)                  AS report_year,
    EXTRACT(MONTH FROM report_date)                 AS report_month,
    DATE_TRUNC('month', report_date)                AS month_start,
    DATE_TRUNC('year', report_date)                 AS year_start,
    AGE(CURRENT_DATE, report_date)                  AS days_since_report,
    CURRENT_DATE - report_date                       AS days_elapsed,
    report_date + INTERVAL '15 days'               AS due_date,
    TO_CHAR(report_date, 'DD-Mon-YYYY')             AS formatted_date
FROM adverse_events;

-- Cases submitted within 15-day window (PV use case!)
SELECT
    case_id,
    receipt_date,
    submission_date,
    (submission_date - receipt_date) AS days_to_submit,
    CASE
        WHEN (submission_date - receipt_date) <= 15
            THEN 'On Time'
        ELSE 'Late'
    END AS compliance_status
FROM icsr_submissions;

CASE WHEN

// Conditional logic inside SQL queries

CASE WHEN — All Patterns

-- Searched CASE (most common)
SELECT
    case_id,
    patient_age,
    CASE
        WHEN patient_age < 18  THEN 'Paediatric'
        WHEN patient_age < 65  THEN 'Adult'
        WHEN patient_age >= 65 THEN 'Elderly'
        ELSE 'Unknown'
    END AS age_group
FROM adverse_events;

-- Simple CASE (equality check only)
SELECT
    CASE severity
        WHEN 'Fatal'         THEN 4
        WHEN 'Life-threatening' THEN 3
        WHEN 'Serious'        THEN 2
        WHEN 'Non-Serious'    THEN 1
        ELSE 0
    END AS severity_score
FROM adverse_events;

-- CASE in aggregate (conditional counting)
SELECT
    drug_name,
    COUNT(*)  AS total_cases,
    SUM(CASE WHEN severity = 'Fatal'    THEN 1 ELSE 0 END) AS fatal_count,
    SUM(CASE WHEN severity = 'Serious'  THEN 1 ELSE 0 END) AS serious_count,
    SUM(CASE WHEN outcome = 'Recovered' THEN 1 ELSE 0 END) AS recovered
FROM adverse_events
GROUP BY drug_name;

-- COALESCE — return first non-null value
SELECT
    case_id,
    COALESCE(reporter_email, reporter_phone, 'No contact') AS contact
FROM cases;

-- NULLIF — return NULL if two values are equal
SELECT NULLIF(denominator, 0)  -- avoids divide-by-zero
FROM calculations;

Indexes & Performance

// Make queries fast — what interviewers love to ask

What is an Index?

An index is a data structure (B-tree by default) that allows the database to find rows quickly without scanning the entire table. Like a book index — points to where the data is instead of reading every page.

Index Trade-offs

Faster: SELECT, WHERE, JOIN, ORDER BY
Slower: INSERT, UPDATE, DELETE (index must be maintained)
Storage: Indexes use disk space

Indexes & EXPLAIN

-- Create index
CREATE INDEX idx_cases_country
    ON adverse_events (country);

-- Composite index (column order matters!)
CREATE INDEX idx_cases_country_date
    ON adverse_events (country, report_date);

-- Unique index (enforces uniqueness)
CREATE UNIQUE INDEX idx_case_unique
    ON cases (worldwide_case_id);

-- EXPLAIN: see query execution plan
EXPLAIN ANALYZE
SELECT * FROM adverse_events
WHERE country = 'India'
  AND report_date >= '2024-01-01';
-- Look for: Seq Scan (bad) vs Index Scan (good)

Tip	Explanation
Use WHERE on indexed columns	Queries filter faster when WHERE clause columns are indexed
Avoid functions on indexed cols	`WHERE UPPER(name) = 'X'` defeats the index — use functional index instead
SELECT only needed columns	Avoid `SELECT *` — fetching fewer columns reduces I/O
Composite index left-prefix rule	Index on (A,B,C) — queries on A or A+B or A+B+C use it; B alone does not
Use LIMIT for large results	Always paginate large result sets
JOIN on indexed FK columns	Foreign key columns used in JOINs should always be indexed

DDL & DML

// CREATE, ALTER, DROP, INSERT, UPDATE, DELETE, TRUNCATE

DDL — Data Definition Language

-- CREATE TABLE
CREATE TABLE adverse_events (
    case_id        SERIAL PRIMARY KEY,
    patient_id     INT NOT NULL,
    drug_name      VARCHAR(200) NOT NULL,
    severity       VARCHAR(50)  DEFAULT 'Non-Serious',
    report_date    DATE NOT NULL,
    country        VARCHAR(100),
    created_at     TIMESTAMP DEFAULT NOW(),
    FOREIGN KEY (patient_id) REFERENCES patients(patient_id)
);

-- ALTER TABLE
ALTER TABLE adverse_events
    ADD COLUMN outcome VARCHAR(100);

ALTER TABLE adverse_events
    RENAME COLUMN drug_name TO suspect_drug;

-- DROP TABLE (destructive!)
DROP TABLE IF EXISTS temp_cases;

DML — Data Manipulation Language

-- INSERT single row
INSERT INTO adverse_events (patient_id, drug_name, severity, report_date)
VALUES (1001, 'Atorvastatin', 'Serious', '2024-06-15');

-- INSERT from SELECT
INSERT INTO archive_cases
SELECT * FROM adverse_events
WHERE report_date < '2020-01-01';

-- UPDATE
UPDATE adverse_events
SET    outcome = 'Recovered',
       updated_at = NOW()
WHERE  case_id = 5042;

-- DELETE specific rows
DELETE FROM adverse_events
WHERE report_date < '2015-01-01'
  AND severity = 'Non-Serious';

-- TRUNCATE (removes ALL rows, faster than DELETE)
TRUNCATE TABLE temp_staging;

-- UPSERT: INSERT or UPDATE if exists (PostgreSQL)
INSERT INTO cases (case_id, status)
VALUES (9999, 'Open')
ON CONFLICT (case_id)
DO UPDATE SET status = EXCLUDED.status;

Interview Q&A

// Real SQL interview questions with answers

// Fundamentals

What is the difference between WHERE and HAVING?+

WHERE filters individual rows before any grouping occurs. It cannot reference aggregate functions.

HAVING filters groups after GROUP BY has aggregated the data. It can reference aggregate functions like COUNT(), SUM(), AVG().

Example: WHERE age > 18 filters patients before counting. HAVING COUNT(*) > 5 filters groups that have more than 5 cases.

Rule: If you want to filter aggregated results → use HAVING. Everything else → use WHERE.

What is the difference between RANK(), DENSE_RANK(), and ROW_NUMBER()?+

Given values: 100, 100, 90, 80:

ROW_NUMBER(): 1, 2, 3, 4 — unique, no ties, arbitrary tiebreak
RANK(): 1, 1, 3, 4 — ties get same rank, next rank skips (gap after tie)
DENSE_RANK(): 1, 1, 2, 3 — ties get same rank, next rank does NOT skip (no gap)

Use ROW_NUMBER when you need exactly 1 row per group (top-N queries). Use RANK/DENSE_RANK when ties matter.

Write a query to find duplicate records in a table.+

-- Find duplicates by case_id
SELECT
    worldwide_case_id,
    COUNT(*) AS occurrences
FROM icsr_cases
GROUP BY worldwide_case_id
HAVING COUNT(*) > 1;

-- Show all columns of duplicate rows
SELECT * FROM icsr_cases
WHERE worldwide_case_id IN (
    SELECT worldwide_case_id
    FROM icsr_cases
    GROUP BY worldwide_case_id
    HAVING COUNT(*) > 1
);

Write a query to find the Nth highest salary (or Nth highest case count).+

-- Method 1: Using DENSE_RANK (best approach)
SELECT drug_name, case_count
FROM (
    SELECT
        drug_name,
        case_count,
        DENSE_RANK() OVER (ORDER BY case_count DESC) AS dr
    FROM drug_summary
) ranked
WHERE dr = 3;  -- change 3 to any N

-- Method 2: Using LIMIT OFFSET
SELECT DISTINCT case_count
FROM drug_summary
ORDER BY case_count DESC
LIMIT 1 OFFSET 2;  -- OFFSET N-1

What is the difference between DELETE, TRUNCATE and DROP?+

DELETE: Removes specific rows based on WHERE clause. Logged row-by-row (slower). Can be rolled back. Table structure remains. Triggers fire.

TRUNCATE: Removes ALL rows instantly. Minimal logging (faster). Cannot have WHERE clause. Usually cannot be rolled back (DB-dependent). Resets identity/auto-increment. Triggers do NOT fire.

DROP: Removes the entire table — structure, data, indexes, constraints all gone. Irreversible (without backup).

Memory tip: DELETE = selective removal, TRUNCATE = empty the table, DROP = destroy the table.

What is a self join? Give a real example.+

A self join joins a table to itself using two different aliases. Used when rows in a table relate to other rows in the same table.

Example — find patients from the same country:

SELECT
    a.patient_id  AS patient_1,
    b.patient_id  AS patient_2,
    a.country
FROM patients a
JOIN patients b
    ON a.country = b.country
   AND a.patient_id < b.patient_id;

PV specific: Write a query to find all drugs that have more than 10 serious cases submitted in 2024, ranked by case count.+

SELECT
    drug_name,
    COUNT(*)                                   AS serious_cases,
    COUNT(DISTINCT country)                    AS countries_affected,
    MIN(report_date)                            AS first_report,
    MAX(report_date)                            AS latest_report,
    RANK() OVER (ORDER BY COUNT(*) DESC)      AS rank_by_cases
FROM adverse_events
WHERE
    severity    = 'Serious'
    AND EXTRACT(YEAR FROM report_date) = 2024
GROUP BY
    drug_name
HAVING
    COUNT(*) > 10
ORDER BY
    serious_cases DESC;

Write a query to check ICSR submission compliance — cases submitted within 15 days vs late.+

WITH compliance_check AS (
    SELECT
        case_id,
        receipt_date,
        submission_date,
        country,
        severity,
        (submission_date - receipt_date) AS days_to_submit,
        CASE
            WHEN submission_date IS NULL
                THEN 'Not Yet Submitted'
            WHEN (submission_date - receipt_date) <= 15
                THEN 'Compliant'
            ELSE 'Late'
        END AS status
    FROM icsr_submissions
    WHERE severity = 'Serious'
)
SELECT
    status,
    COUNT(*)                        AS case_count,
    ROUND(AVG(days_to_submit), 1)  AS avg_days,
    MAX(days_to_submit)             AS worst_case_days
FROM compliance_check
GROUP BY status
ORDER BY case_count DESC;

    SQL END-TO-END GUIDE // ALL CLAUSES, FUNCTIONS & PATTERNS

    ← Back to PV Interview Prep