Creating Tables

Data Types

  • char(n)
  • varchar(n)
  • int
  • text
  • date
  • interval
  • numeric(a, b)

Constraints

CREATE TABLE resource(
  rsrc_id INT PRIMARY KEY GENERATED ALWAYS AS IDENTITY,
  rsrc_name TEXT NOT NULL,
  rsrc_link TEXT UNIQUE,
  rsrc_type VARCHAR(6) NOT NULL CHECK(rsrc_type IN(
    'book',
    'video',
    'notes'
  ))
);
CREATE TABLE courses_students(
  evaluation NUMERIC( 3, 0) CHECK(evaluation >= 0 AND evaluation <= 100),
  student_id INT REFERENCES student,
  course_id INT REFERENCES course,
  PRIMARY KEY(
    student_id,
    course_id
  )
);

Inserting Data

INSERT INTO table_name (columns, ...) VALUES ...,list,of,values,...;
INSERT INTO course (course_name, textbook_id, university_id) VALUES
('Information Security', 3, 4),
('Natural Language Processing', 3, 2),
('Computer Vision', 3, 1),
('Advances in Operating System', 3, 2);
INSERT INTO instructor (instructor_name) VALUES
('Andrew Ng'),
('Pralay Mitra'),
('Anirudh Verma'),
('Soen');

Joins

Join two or more tables according to rules of JAIN JOIN type

T1 { [INNER] | { LEFT | RIGHT | FULL } [OUTER] } JOIN T2 ON boolean_expression;
T1 { [INNER] | { LEFT | RIGHT | FULL } [OUTER] } JOIN T2 USING ( join column list );
T1 NATURAL { [INNER] | { LEFT | RIGHT | FULL } [OUTER] } JOIN T2;

Inner

For each row of the joined table has a row for each row in that satsifies the join condition with

Outer

  • LEFT OUTER: joined table always has at least one row for each row in , even if null values are added to rows
  • RIGHT OUTER: joined table always has at least one row for each row in , even if null* values are added for rows
  • FULL OUTER: both

USING

shorthand for ON join condition, expands to

SELECT DISTINCT cn.name, s.name FROM country cn
  INNER JOIN student s USING(iso);

expands to

SELECT DISTINCT cn.name, s.name FROM country cn
  INNER JOIN student s ON cn.iso = s.iso;

used for joining tables with matching column names (you provide the exact column name)

NATURAL

shorthand for USING(), forms a USING list consisting of all column names that appear in both input tables

SELECT course_name, student_name, evaluation FROM courses_students
  NATURAL JOIN course      -- course_id common column
  NATURAL JOIN student;    -- student_id common column

Queries

Grouping

  • GROUP BY: group rows having some same column value
    • can apply aggregates
  • HAVING: basically WHERE but for groups
    • ungrouped columns must be in aggregate functions
-- course with most students
SELECT c.course_name FROM course c
  NATURAL JOIN courses_students
  NATURAL JOIN student s
  GROUP BY 
    c.course_name
  ORDER BY count(s.student_id) DESC
  LIMIT 1;
-- student with highest average marks
SELECT s.student_name, avg(cs.evaluation) FROM student s
  NATURAL JOIN courses_students cs
  NATURAL JOIN tags
  NATURAL JOIN topic t
  WHERE t.topic_name = 'AI'
    AND s.country = 'IN'
  GROUP BY s.student_id
  ORDER BY avg(cs.evaluation) DESC
  LIMIT 1;

Aggregation

Aggregate the data of a column across all rows. The built-in aggregate functions are

  • avg()
  • count()
  • min()
  • max()
  • sum()

plus a few more like bitwise operations etc

ORDER BY

Query results are usually not sorted

SELECT select_list
    FROM table_expression
    ORDER BY sort_expression1 [ASC | DESC] [NULLS { FIRST | LAST }]
             [, sort_expression2 [ASC | DESC] [NULLS { FIRST | LAST }] ...]

To sort them

Subqueries

These exist

SELECT DISTINCT c.course_name FROM course c
  -- set of non-genai is empty
  WHERE NOT EXISTS (
    -- each student
    SELECT cs.course_id FROM courses_students cs
    WHERE cs.course_id = c.course_id
      -- is not in genai
      AND cs.student_id NOT IN (
      SELECT s.student_id FROM student s
        NATURAL JOIN courses_students
        NATURAL JOIN course
        WHERE course.course_name = 'GenAI'
    )
  );

Relation Algebra for all Queries

  • select operator
  • project operator
  • union
  • set difference
  • cartesian product
  • rename
  • join

Q1. All certificate courses on AI with duration 6 months

-- q1
SELECT DISTINCT c.course_name FROM course c
  NATURAL JOIN courses_programs
  NATURAL JOIN program p
  NATURAL JOIN tags
  NATURAL JOIN topic t
  WHERE p.program_type = 'certificate'
    AND t.topic_name = 'AI'
    AND p.duration <= interval '6 months';

Q2. All certificate courses on AI 6 months by IITKGP

-- q2
SELECT DISTINCT c.course_name FROM course c
  NATURAL JOIN courses_programs
  NATURAL JOIN program p
  NATURAL JOIN tags
  NATURAL JOIN topic t
  NATURAL JOIN university u
  WHERE p.program_type = 'certificate'
    AND t.topic_name = 'AI'
    AND p.duration <= interval '6 months'
    AND u.university_name = 'IITKGP';

Q3. Students (age 18 or age 60 ) who took GenAI

-- q3
SELECT DISTINCT s.student_name FROM student s
  NATURAL JOIN courses_students
  NATURAL JOIN course c
  WHERE c.course_name = 'GenAI'
    AND (
      EXTRACT(year FROM age(current_date,s.student_dob))::int < 18 
      OR 
      EXTRACT(year FROM age(current_date,s.student_dob))::int > 60
    );

Q4. Non-Indian Students in AI from IITKGP

-- q6
SELECT DISTINCT s.student_name FROM student s
  NATURAL JOIN courses_students
  NATURAL JOIN course c
  NATURAL JOIN tags
  NATURAL JOIN topic t
  INNER JOIN university u USING(university_id)
  WHERE s.country <> 'IN'
    AND t.topic_name = 'AI'
    AND u.university_name = 'IITKGP';

Q5. Countries of students taught by Andrew Ng

-- q5
SELECT DISTINCT cn.name FROM country cn
  INNER JOIN student s ON cn.iso = s.country
  NATURAL JOIN courses_students
  NATURAL JOIN courses_teachers
  NATURAL JOIN instructor i
  WHERE i.instructor_name = 'Andrew Ng';

Q6. Instructors with at least one Indian student

-- q6
SELECT DISTINCT i.instructor_name FROM instructor i
  NATURAL JOIN courses_teachers
  NATURAL JOIN courses_students
  NATURAL JOIN student s
  WHERE s.country = 'IN';

Q7. Courses taken by students who also took GenAI

-- q7
SELECT DISTINCT c.course_name FROM course c
  NATURAL JOIN courses_students
  NATURAL JOIN student s
  WHERE s.student_id IN (
    SELECT DISTINCT s.student_id FROM student s
      NATURAL JOIN courses_students
      NATURAL JOIN course c
      WHERE c.course_name = 'GenAI'
  );

Q8. Courses where all students took ‘GenAI’

-- q8
SELECT DISTINCT c.course_name FROM course c
  WHERE NOT EXISTS (
    SELECT cs.course_id FROM courses_students cs
    WHERE cs.course_id = c.course_id
      AND cs.student_id NOT IN (
      SELECT s.student_id FROM student s
        NATURAL JOIN courses_students
        NATURAL JOIN course
        WHERE course.course_name = 'GenAI'
    )
  );
-- q9
SELECT c.course_name FROM course c
  NATURAL JOIN courses_students
  NATURAL JOIN student s
  GROUP BY 
    c.course_name
  ORDER BY count(s.student_id) DESC
  LIMIT 1;

Grouping Operator

Assuming a grouping operator that groups based on a column with equal values and only accepts aggregate functions for other non-grouped columns it basically converts multiple rows with a common column value into a single row with the other columns aggregated

Q10. Indian student with highest marks in AI

-- q10
SELECT s.student_name, avg(cs.evaluation) FROM student s
  NATURAL JOIN courses_students cs
  NATURAL JOIN tags
  NATURAL JOIN topic t
  WHERE t.topic_name = 'AI'
    AND s.country = 'IN'
  GROUP BY s.student_id
  ORDER BY avg(cs.evaluation) DESC
  LIMIT 1;