SQL - Previous month sales using Lag, GroupBy
- Published on
- • 2 mins read•––– views
Find the total quantity previous year for a given dataset
season, geo, product, qty
2020SU, asia, shoes, 50
2020SU, asia, dresses, 80
2020SU, asia, dresses, 40
2020FA, asia, socks, 120
2021FA, asia, shoes, 90
2021SU, asia, dresses, 180
2021SU, asia, dresses, 10
2022SU, asia, dresses, 280
2022FA, asia, socks, 210
2020SU, europe, dresses, 60
2021SU, europe, dresses, 100
2022SU, europe, dresses, 280
2022FA, europe, socks, 210
2021SU, global, dresses, NULL
2022SU, global, dresses, NULL
Output:
season, geo, product, qty, qty_previous_year
2020SU, asia, dresses, 120, NULL
2021SU, asia, dresses, 190, 120
2022SU, asia, dresses, 280, 190
2020SU, asia, shoes, 50, NULL
import psycopg2from psycopg2.extras import execute_valuesimport pandasimport warnings
warnings.filterwarnings('ignore')
# Connect to the postgresSQL serverconn = psycopg2.connect( host="localhost", port=5432, database="postgres", user="postgres", password="N0P4ssw0rd!@#$", options="-c search_path=temp")cur = conn.cursor()
table_name = "orders"drop_table = f"""drop table if exists {table_name};"""create_table = f""" create table if not exists {table_name} ( season varchar, geo varchar, product varchar, qty integer ); """insert_table = f"""insert into {table_name} values %s"""select_table = f"""select * from {table_name}"""cur.execute(drop_table)conn.commit()cur.execute(create_table)conn.commit()
data = """2020SU, asia, shoes, 502020SU, asia, dresses, 802020SU, asia, dresses, 402020FA, asia, socks, 1202021FA, asia, shoes, 902021SU, asia, dresses, 1802021SU, asia, dresses, 102022SU, asia, dresses, 2802022FA, asia, socks, 2102020SU, europe, dresses, 602021SU, europe, dresses, 1002022SU, europe, dresses, 2802022FA, europe, socks, 2102021SU, global, dresses, NULL2022SU, global, dresses, NULL""".replace("NULL", "0")
data = [tuple([_.strip() for _ in x.split(",")]) for x in data.split("\n") if x]# print(data)
execute_values(cur, insert_table, data)conn.commit()
def run_query(_query, option=1): if option == 0: cur.execute(_query) rows = cur.fetchall() for row in rows: print(row) else: df = pandas.read_sql_query(_query, con=conn) print(df.to_string(index=False))
result_query = f"""select *, lag(qty) over (partition by geo, product order by season) as previous_year_qtyfrom (select season, geo, product, sum(qty::int) as qtyfrom {table_name}group by season, geo, productorder by season, geo, product, qty asc) t"""
run_query(result_query, option=1)