# Run this cell to set up packages for lecture.
from lec22_imports import *

babies = bpd.read_csv('data/baby.csv').get(['Maternal Smoker', 'Birth Weight'])
babies

smokers = babies[babies.get('Maternal Smoker')]
non_smokers = babies[babies.get('Maternal Smoker') == False]

fig, ax = plt.subplots()
baby_bins = np.arange(50, 200, 5)
non_smokers.plot(kind='hist', density=True, ax=ax, alpha=0.75, bins=baby_bins, ec='w', figsize=(10, 5))
smokers.plot(kind='hist', density=True, ax=ax, alpha=0.75, bins=baby_bins, ec='w')
plt.legend(['Maternal Smoker = False', 'Maternal Smoker = True'])
plt.xlabel('Birth Weight');

babies.groupby('Maternal Smoker').mean()

diff_in_means = (babies.groupby('Maternal Smoker').mean().get('Birth Weight').loc[False] - 
                 babies.groupby('Maternal Smoker').mean().get('Birth Weight').loc[True])
diff_in_means

9.266142572024918

show_permutation_testing_intro()

data = bpd.DataFrame().assign(x=['a', 'b', 'c', 'd', 'e'], y=[1, 2, 3, 4, 5])
data

# The order of the rows are different,
# but each x is still in a row with the same y.
# This is NOT what we want.
data.sample(data.shape[0])

# Random!
np.random.permutation(data.get('x'))

array(['a', 'e', 'c', 'b', 'd'], dtype=object)

data.assign(shuffled_x=np.random.permutation(data.get('x')))

babies_with_shuffled = babies.assign(
    Shuffled_Labels=np.random.permutation(babies.get('Maternal Smoker'))
)
babies_with_shuffled

fig, ax = plt.subplots()
baby_bins = np.arange(50, 200, 5)
smokers = babies_with_shuffled[babies_with_shuffled.get('Shuffled_Labels')]
non_smokers = babies_with_shuffled[babies_with_shuffled.get('Shuffled_Labels') == False]
non_smokers.plot(kind='hist', y='Birth Weight', density=True, ax=ax, alpha=0.75, bins=baby_bins, ec='w', figsize=(10, 5))
smokers.plot(kind='hist',y='Birth Weight', density=True, ax=ax, alpha=0.75, bins=baby_bins)
plt.legend(['Maternal Smoker = False', 'Maternal Smoker = True'])
plt.xlabel('Birth Weight');

babies_with_shuffled.groupby('Shuffled_Labels').mean().get(['Birth Weight'])

group_means = babies_with_shuffled.groupby('Shuffled_Labels').mean().get('Birth Weight')
group_means.loc[False] - group_means.loc[True]

-0.6929231988055449

def difference_in_group_means(weights_df):
    group_means = weights_df.groupby('Shuffled_Labels').mean().get('Birth Weight')
    return group_means.loc[False] - group_means.loc[True]

difference_in_group_means(babies_with_shuffled)

-0.6929231988055449

n_repetitions = 500 # The dataset is large, so it takes too long to run if we use 5000 or 10000
differences = np.array([])

for i in np.arange(n_repetitions):
    # Step 1: Shuffle the labels to create two new samples.
    shuffled_labels = np.random.permutation(babies.get('Maternal Smoker'))
    
    # Step 2: Add them as a column to the DataFrame.
    shuffled = babies_with_shuffled.assign(Shuffled_Labels=shuffled_labels)
    
    # Step 3: Compute the difference in group means in the two new samples and store the result.
    difference = difference_in_group_means(shuffled)
    
    differences = np.append(differences, difference)
    
differences

array([ 0.  , -0.65, -0.66, ..., -1.19,  0.27,  0.83])

(bpd.DataFrame()
 .assign(simulated_diffs=differences)
 .plot(kind='hist', bins=20, density=True, ec='w', figsize=(10, 5))
);

(bpd.DataFrame()
 .assign(simulated_diffs=differences)
 .plot(kind='hist', bins=20, density=True, ec='w', figsize=(10, 5))
);
plt.axvline(diff_in_means, color='black', linewidth=4, label='observed difference in means')
plt.legend();

smoker_p_value = np.count_nonzero(differences >= diff_in_means) / n_repetitions
smoker_p_value

0.0

show_permutation_testing_summary()

babies.take(np.arange(3))

footballs = bpd.read_csv('data/footballs.csv')
footballs

means = footballs.groupby('Team').mean().get('PressureDrop')
means

Team
Colts       0.47
Patriots    1.21
Name: PressureDrop, dtype: float64

# Calculate the observed statistic.
observed_difference = means.loc['Patriots'] - means.loc['Colts']
observed_difference

0.7362500000000001

# For simplicity, keep only the columns that are necessary for the test: 
# one column of group labels and one column of numerical values.
footballs = footballs.get(['Team', 'PressureDrop'])
footballs

# Shuffle one column. 
# We chose to shuffle the numerical data (pressure drops), but we could have shuffled the group labels (team names) instead.
shuffled_drops = np.random.permutation(footballs.get('PressureDrop'))
shuffled_drops

array([1.23, 0.72, 0.85, 1.65, 0.42, 0.28, 1.18, 1.48, 1.8 , 0.47, 1.35,
       0.47, 1.38, 0.65])

# Add the shuffled column back to the DataFrame.
shuffled = footballs.assign(Shuffled_Drops=shuffled_drops)
shuffled

# Calculate the group means for the two randomly created groups.
team_means = shuffled.groupby('Team').mean().get('Shuffled_Drops')
team_means

Team
Colts       0.96
Patriots    1.01
Name: Shuffled_Drops, dtype: float64

# Calcuate the difference in group means (Patriots minus Colts) for the randomly created groups.
team_means.loc['Patriots'] - team_means.loc['Colts']

0.04499999999999982

def difference_in_mean_pressure_drops(pressures_df):
    team_means = pressures_df.groupby('Team').mean().get('Shuffled_Drops')
    return team_means.loc['Patriots'] - team_means.loc['Colts']

n_repetitions = 5000 # The dataset is much smaller than in the baby weights example, so a larger number of repetitions will still run quickly.

differences = np.array([])
for i in np.arange(n_repetitions):
    # Step 1: Shuffle the pressure drops.
    shuffled_drops = np.random.permutation(footballs.get('PressureDrop'))
    
    # Step 2: Put them in a DataFrame.
    shuffled = footballs.assign(Shuffled_Drops=shuffled_drops)
    
    # Step 3: Compute the difference in group means and add the result to the differences array.
    difference = difference_in_mean_pressure_drops(shuffled)

    differences = np.append(differences, difference)
    
differences

array([ 0.36, -0.15, -0.11, ...,  0.13, -0.02,  0.47])

bpd.DataFrame().assign(SimulatedDifferenceInMeans=differences).plot(kind='hist', bins=20, density=True, ec='w', figsize=(10, 5))
plt.axvline(observed_difference, color='black', linewidth=4, label='observed difference in means')
plt.legend();

np.count_nonzero(differences >= observed_difference) / n_repetitions

0.0044

Lecture 22 – Permutation Testing¶

DSC 10, Spring 2025¶

Agenda¶

Permutation testing¶

Setting¶

Example: Smoking and birth weight 👶¶

Smoking and birth weight¶

Visualizing the distribution of each group¶

Setup for the hypothesis test¶

Discussion Question¶

Generating new samples under the null hypothesis¶

Constructing a population¶

Permutations¶

Permutation tests¶

Permutation tests with DataFrames¶

Shuffling one column¶

How close are the means of the shuffled groups?¶

Simulation¶

Running the simulation¶

Conclusion of the test¶

Conclusion¶

Concept Check ✅ – Answer at cc.dsc10.com ¶

Example: Did the New England Patriots cheat? 🏈📌¶

Deflategate¶

Background¶

The measurements¶

The question¶

The test statistic¶

Creating random groups and calculating one value of the test statistic¶

The simulation¶

Conclusion¶

🚨 Caution!¶

Aftermath¶

Summary, next time¶

Summary¶

A/B testing¶

Next time¶

	Maternal Smoker	Birth Weight
0	False	120
1	False	113
2	True	128
...	...	...
1171	True	130
1172	False	125
1173	False	117

	Team	Pressure	PressureDrop
0	Patriots	11.65	0.85
1	Patriots	11.03	1.48
2	Patriots	10.85	1.65
...	...	...	...
11	Colts	12.53	0.47
12	Colts	12.72	0.28
13	Colts	12.35	0.65

	Team	PressureDrop	Shuffled_Drops
0	Patriots	0.85	1.23
1	Patriots	1.48	0.72
2	Patriots	1.65	0.85
...	...	...	...
11	Colts	0.47	0.47
12	Colts	0.28	1.38
13	Colts	0.65	0.65

	x	y	shuffled_x
0	a	1	d
1	b	2	c
2	c	3	a
3	d	4	b
4	e	5	e

Lecture 22 – Permutation Testing¶

DSC 10, Spring 2025¶

Agenda¶

Permutation testing¶

Setting¶

Example: Smoking and birth weight 👶¶

Smoking and birth weight¶

Visualizing the distribution of each group¶

Setup for the hypothesis test¶

Discussion Question¶

Generating new samples under the null hypothesis¶

Constructing a population¶

Permutations¶

Permutation tests¶

Permutation tests with DataFrames¶

Shuffling one column¶

How close are the means of the shuffled groups?¶

Simulation¶

Running the simulation¶

Conclusion of the test¶

Conclusion¶

Concept Check ✅ – Answer at cc.dsc10.com¶

Example: Did the New England Patriots cheat? 🏈📌¶

Deflategate¶

Background¶

The measurements¶

The question¶

The test statistic¶

Creating random groups and calculating one value of the test statistic¶

The simulation¶

Conclusion¶

🚨 Caution!¶

Aftermath¶

Summary, next time¶

Summary¶

A/B testing¶

Next time¶

Concept Check ✅ – Answer at cc.dsc10.com ¶