Python Snippets¶
Copy-paste building blocks.
Connect to a Postgres database¶
import os
import pandas as pd
from sqlalchemy import create_engine
engine = create_engine(os.environ['DATABASE_URL'])
df = pd.read_sql('SELECT * FROM users WHERE created_at > :since',
engine, params={'since': '2026-01-01'})
Read multiple CSVs into one DataFrame¶
from pathlib import Path
import pandas as pd
dfs = [pd.read_csv(f).assign(source=f.name) for f in Path('data').glob('*.csv')]
df = pd.concat(dfs, ignore_index=True)
Cohort retention¶
df['signup_month'] = df['signup_date'].dt.to_period('M')
df['active_month'] = df['active_date'].dt.to_period('M')
df['cohort_index'] = (df['active_month'] - df['signup_month']).apply(lambda x: x.n)
cohort = df.groupby(['signup_month', 'cohort_index'])['user_id'].nunique().unstack()
retention = cohort.divide(cohort[0], axis=0)
Funnel conversion¶
funnel = (df.groupby('step')['user_id']
.nunique()
.reindex(['visit','signup','purchase']))
funnel_pct = funnel / funnel.iloc[0]
A/B test (two-proportion z-test)¶
from statsmodels.stats.proportion import proportions_ztest
success = [120, 150] # conversions
n = [1000, 1000] # visitors
z, p = proportions_ztest(success, n)
print(f'z={z:.2f}, p={p:.4f}')
Linear regression with statsmodels¶
import statsmodels.api as sm
X = sm.add_constant(df[['ad_spend', 'season']])
model = sm.OLS(df['revenue'], X).fit()
print(model.summary())
Save plot to file¶
import matplotlib.pyplot as plt
fig, ax = plt.subplots(figsize=(8, 5))
df['revenue'].plot(ax=ax, title='Revenue')
fig.savefig('revenue.png', dpi=150, bbox_inches='tight')
Convert wide → long¶
long = df.melt(id_vars=['user_id'], value_vars=['mon','tue','wed'],
var_name='day', value_name='hours')
Weighted average¶
def weighted_avg(group):
return (group['value'] * group['weight']).sum() / group['weight'].sum()
df.groupby('category').apply(weighted_avg)
Bin numeric to category¶
df['age_group'] = pd.cut(df['age'], bins=[0, 17, 34, 54, 100],
labels=['<18','18-34','35-54','55+'])
df['rev_quartile'] = pd.qcut(df['revenue'], q=4, labels=['Q1','Q2','Q3','Q4'])
Anti-join (rows in A not in B)¶
m = a.merge(b, on='id', how='left', indicator=True)
only_in_a = m[m['_merge'] == 'left_only'].drop(columns='_merge')
Date features¶
df['date'] = pd.to_datetime(df['date'])
df['year'] = df['date'].dt.year
df['quarter'] = df['date'].dt.quarter
df['weekday'] = df['date'].dt.day_name()
df['is_weekend'] = df['date'].dt.weekday >= 5
df['days_since'] = (pd.Timestamp.today() - df['date']).dt.days