- “Window” refers to a set of rows that surround a considered row, either preceding or following that row.
- Window function basics
[What is impact of Partition by in Window function?](Effective-SQL:Item 37: Know How to Use Window Functions)
- PARTITION BY predicate specifies how the window should be divided.
- If partition by is omitted, your database system applies the function over the entire result set
- Partition BY results are sensitive to the order in which the rows are returned.
- Query may have different predicates for each OVER clause
[What is impact of if Partition by is missing in Window function?](Effective-SQL:Item 37: Know How to Use Window Functions)
- If there is no partition-clause or oder-by given... aggregate function work like group-by
- select account_id, amount, SUM(amount) OVER() AS total_sum from account;
- Window functions are “aware” of the surrounding rows.
- Which makes it easier to create running or moving aggregations than with the traditional aggregation functions
- It also helps at statement-level grouping.
- Yes!
- PARTITION BY s.CustomerID, s.PurchaseMonth ORDER BY s.PurchaseYear -- is valid statement
- Many of the aggregate functions SUM(), COUNT(), AVG(), and others, can be used as window functions.
- ROW_NUMBER() and RANK()
- Whenever you need to change the window frame’s bounding to a non-default setting, you must specify an ORDER BY predicate even when it is optional.
- If you need to define an arbitrary size for a window frame, you must use ROWS, which allows you to input how many rows preceding or following are to be included in the window frame.
- RANGE can accept only UNBOUNDED PRECEDING, CURRENT ROW, or UNBOUNDED FOLLOWING as valid options.
You can choose between RANGE for logical grouping of rows and ROWS for physical offset of the rows. If the ORDER BY predicate does not return duplicate values, the results are equivalent
select
round(100 * sum(customer_pref_delivery_date = order_date) / sum(1), 2)
as immediate_percentage
from Delivery;
SELECT
ROUND(100*AVG(order_date = customer_pref_delivery_date), 2) AS immediate_percentage
FROM Delivery;
select * from department d where not exists (
select 1 from employee e where e.depart_name = d.depart_name
)
select * from (
select store_name, sum(price) as total_sales from sales group by store_name
) sales
join
(select avg(total_sales) as sales from (
select store_name, sum(price) as total_sales from sales group by store_name) x
) avg_sales
on sales.total_sales > avg_sales.sales;
---
with sales as ( select store_name, sum(price) as total_sales from sales group by store_name )
select * from sales
join (select avg(total_sales) as sales from sales x) avg_sales
on sales.total_sales > avg_sales.sales
---
select store_name from sales group by stores having sum(sales) > (select avg(price) from sales)
SELECT
q1.person_name
FROM
Queue q1 JOIN Queue q2 ON q1.turn >= q2.turn
GROUP BY q1.turn
HAVING SUM(q2.weight) <= 1000
ORDER
BY SUM(q2.weight) DESC
LIMIT 1
with running_sum as (
select person_id, person_name, turn, sum(weight) over (order by turn) as s_weight from Queue
)
select person_name from running_sum
where turn = (select max(turn) from running_sum where s_weight <= 1000 )
log_id - 1,2,3,,6,8 %
with r as (
select log_id, ROW_NUMBER() over (order by log_id) as rown from logs
)
select min(log_id) as start_id, max(log_id) as end_id from r group by (log_id - rown )
select L1.log_id as START_ID, L2.log_id as END_ID
from
(select log_id from Logs
where log_id-1 not in (select log_id from Logs)) L1,
(select log_id from Logs
where log_id+1 not in (select log_id from Logs)) L2
where L1.log_id <= L2.log_id
Order by start_id
START_ID | END_ID |
---|---|
1 | 3 |
1 | 8 |
1 | 10 |
7 | 8 |
7 | 10 |
10 | 10 |
select L1.log_id as START_ID, min(L2.log_id) as END_ID
from
(select log_id from Logs
where log_id-1 not in (select log_id from Logs)) L1,
(select log_id from Logs
where log_id+1 not in (select log_id from Logs)) L2
where L1.log_id <= L2.log_id
group by L1.log_id
select *,
(case when salary > (select avg(salary) from employee) then 'Higher than average'
else null end) as remarks
from
employee
--
select *,
(case when salary > avg_salary.sal) then 'Higher than average'
else null end) as remarks
from
employee
cross join (select avg(salary) sal from employee) avg_salary;
select store_name, sum(quantity) from sales group by store_name having sum(quantity) > (select avg(quantiy) from sales);
insert into employee_history
select e.emp_id, e.emp_name, d.dept_name, e.salary, d.location from employee e
join department d on d.dept_name = e.dept_name
where not exists (select 1 from employee_history eh where eh.emp_id = e.emp_id)
How to Give 10% increment to all employees in Bangalore location based on the maximum salary earned by an emp in each dept. Only consider employees in employee_history table
update employee e
set salary = (select max(salary) * 1.1 from employee_history eh where eh.dept_name = e.dept_name)
where e.dept_name in (select dept_name from department where location='Bangalore')
and e.emp_id in (select emp_id from employee_history)
with matches as (select row_number() over (order by team_name) as id, team_code, team_name from teams t)
select
team.name as team, opponent.name as oppnent
from matches team join matches opponent on team.id < opponent.id
delete from department d
where d.deparement id not in (select 1 from employee e where e.dept_name = d.dept_name)
--
delete from department d where depatment_name in (
select dept_name from department d where not exists (select 1 from employee e where e.dept_name = d.dept_name)
)
SELECT Recipes.RecipeID, Recipes.RecipeTitle FROM
Recipes INNER JOIN Recipe_Ingredients ON Recipes.RecipeID = Recipe_Ingredients.RecipeID
INNER JOIN Ingredients ON Ingredients.IngredientID = Recipe_Ingredients.IngredientID
WHERE Ingredients.IngredientName = 'Beef'
SELECT BeefRecipes.RecipeTitle
FROM
(SELECT Recipes.RecipeID, Recipes.RecipeTitle
FROM (Recipes INNER JOIN Recipe_Ingredients
ON Recipes.RecipeID = Recipe_Ingredients.RecipeID)
INNER JOIN Ingredients
ON Ingredients.IngredientID =
Recipe_Ingredients.IngredientID
WHERE Ingredients.IngredientName = 'Beef')
AS BeefRecipes
INNER JOIN
(SELECT Recipe_Ingredients.RecipeID
FROM Recipe_Ingredients INNER JOIN Ingredients
ON Ingredients.IngredientID =
Recipe_Ingredients.IngredientID
WHERE Ingredients.IngredientName = 'Garlic')
AS GarlicRecipes
ON BeefRecipes.RecipeID = GarlicRecipes.RecipeID;
select employee_id, count(team_id) over (partition by team_id) as team_size from employee
order by employee_id
with team_size as (select count(1) as team_size, team_id from Employee group by team_id)
select employee_id, team_size.team_size from Employee e inner join team_size on team_size.team_id = e.team_id
-- Ensure you've run SalesOrdersStructure.sql
-- and SalesOrdersData.sql in the Sample Databases folder
-- in order to run this example.
USE SalesOrdersSample;
GO
SELECT Customers.CustomerID, Customers.CustFirstName,
Customers.CustLastName, Orders.OrderNumber, Orders.OrderDate
FROM Customers
INNER JOIN Orders
ON Customers.CustomerID = Orders.CustomerID
WHERE EXISTS
(SELECT NULL
FROM (Orders AS O2
INNER JOIN Order_Details
ON O2.OrderNumber = Order_Details.OrderNumber)
INNER JOIN Products
ON Products.ProductNumber = Order_Details.ProductNumber
WHERE Products.ProductName = 'Skateboard'
AND O2.OrderNumber = Orders.OrderNumber)
AND EXISTS
(SELECT NULL
FROM (Orders AS O3
INNER JOIN Order_Details
ON O3.OrderNumber = Order_Details.OrderNumber)
INNER JOIN Products
ON Products.ProductNumber = Order_Details.ProductNumber
WHERE Products.ProductName = 'Helmet'
AND O3.OrderNumber = Orders.OrderNumber);
- LAG() function provides access to a row that comes before the current row at a specified physical offset. In other words, from the current row the LAG() function can access data of the previous row, or the row before the previous row, and so on.
WITH cte AS (
SELECT year, SUM(amount) amount
FROM sales GROUP BY year ORDER BY year
)
SELECT
year,
amount,
LAG(amount,1) OVER (ORDER BY year) previous_year_sales
FROM
cte;
SUM(t.Amount) OVER (PARTITION BY t.AccountID ORDER BY t.TransactionID DESC) - t.Amount AS TotalUnspent,
RANK() OVER ( PARTITION BY o.CustomerID ORDER BY o.OrderTotal DESC ) AS CustomerOrderRanking
select sale_date, sum(case when fruit='apples' then sold_num else -sold_num end) as diff
from sales group by sale_date
select a.sale_date, a.sold_num - b.sold_num as diff from Sales a inner join Sales b on a.sale_date = b.sale_date
and a.fruit='apples' and b.fruit = 'oranges'
order by a.sale_date
Find TotalByCustomer - running sum for each customer, TotalOverall - and entire order Expected: "CustomerID, OrderNumber, OrderTotal, TotalByCustomer and TotalOverall"
SELECT o.OrderNumber, o.CustomerID, o.OrderTotal,
SUM(o.OrderTotal) OVER (PARTITION BY o.CustomerID ORDER BY o.OrderNumber, o.CustomerID) AS TotalByCustomer,
SUM(o.OrderTotal) OVER (ORDER BY o.OrderNumber) AS TotalOverall
FROM Orders AS o
ORDER BY o.OrderNumber, o.CustomerID;
Find total-unspent using different predicate for each over clause, Transaction: Amount, AccountId, TransactionId
SELECT t.AccountID, t.Amount,
SUM(t.Amount) OVER (PARTITION BY t.AccountID ORDER BY t.TransactionID DESC) - t.Amount AS TotalUnspent,
SUM(t.Amount) OVER (ORDER BY t.TransactionID) AS TotalOverall
FROM Transactions AS t
ORDER BY t.TransactionID
SET search_path = SalesOrdersSample;
SELECT
ROW_NUMBER() OVER ( ORDER BY o.OrderDate) AS OrderSequence,
ROW_NUMBER() OVER ( PARTITION BY o.CustomerID ORDER BY o.OrderDate ) AS CustomerOrderSequence,
o.OrderNumber, o.CustomerID, o.OrderDate, o.OrderTotal,
RANK() OVER ( ORDER BY o.OrderTotal DESC ) AS OrderRanking,
RANK() OVER ( PARTITION BY o.CustomerID ORDER BY o.OrderTotal DESC ) AS CustomerOrderRanking
FROM Orders AS o
ORDER BY o.OrderDate;
##[Demonstration of moving average window functions](5.34 -Effective SQL)
SELECT s.CustomerID, s.PurchaseYear, s.PurchaseMonth,
LAG(s.PurchaseTotal, 1) OVER (PARTITION BY s.CustomerID, s.PurchaseMonth ORDER BY s.PurchaseYear ) AS PreviousMonthTotal,
s.PurchaseTotal AS CurrentMonthTotal,
LEAD(s.PurchaseTotal, 1) OVER (PARTITION BY s.CustomerID, s.PurchaseMonth ORDER BY s.PurchaseYear ) AS NextMonthTotal,
AVG(s.PurchaseTotal) OVER ( PARTITION BY s.CustomerID, s.PurchaseMonth ORDER BY s.PurchaseYear ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING ) AS MonthOfYearAverage
FROM PurchaseStatistics AS s
ORDER BY s.CustomerID, s.PurchaseYear, s.PurchaseMonth;
SELECT
s.CustomerID, s.PurchaseYear, s.PurchaseMonth,
SUM(s.PurchaseCount) OVER ( PARTITION BY s.PurchaseYear
ORDER BY s.CustomerID
RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW
) AS CountByRange,
SUM(s.PurchaseCount) OVER (
PARTITION BY s.PurchaseYear
ORDER BY s.CustomerID
ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW
) AS CountByRows
FROM PurchaseStatistics AS s
ORDER BY s.CustomerID, s.PurchaseYear, s.PurchaseMonth;
- Effective-SQL/PostgreSQL/Chapter 05/Listing 5.031.sql
- Crack SQL Interview Question: Subquery vs. CTE
mdanki Advanced_Sql.md Advanced_Sql.apkg --deck "Mohan::Pack::Advanced_Sql"