# Run this cell to set up packages for lecture.
from lec06_imports import *

def mystery_func_1(x, y):
    z = x * y
    return z * 3

w = mystery_func_1(2, 3)
print(w)

18

def first_name(full_name):
    '''Returns the first name given a full name.'''
    return full_name.split(' ')[0]

first_name('Evonne Fangao')

'Evonne'

roster = bpd.read_csv('data/roster-anon.csv')
roster

roster

roster

roster.get('name').iloc[0]

'Cindy Ubobpd'

first_name(roster.get('name').iloc[0])

'Cindy'

first_name(roster.get('name').iloc[1])

'Madeleine'

roster.get('name')

0          Cindy Ubobpd
1      Madeleine Omidge
2          Caleb Ryincn
             ...       
137        Chloe Camvgc
138       Sophie Ilvrib
139         Lani Rpcmgt
Name: name, Length: 140, dtype: object

roster.get('name').apply(first_name)

0          Cindy
1      Madeleine
2          Caleb
         ...    
137        Chloe
138       Sophie
139         Lani
Name: name, Length: 140, dtype: object

roster = roster.assign(
    first=roster.get('name').apply(first_name)
)
roster

roster.groupby('first').count().sort_values(by='name', ascending=False)

name_counts = (
    roster
    .groupby('first')
    .count()
    .sort_values('name', ascending=False)
    .get(['name'])
)
name_counts

name_counts.get("name") > 1

first
Abraham     True
Diego       True
Audrey      True
           ...  
Diya       False
Dante      False
Zirong     False
Name: name, Length: 134, dtype: bool

name_counts[name_counts.get("name") > 1].plot(kind='barh')

<Axes: ylabel='first'>

...

Ellipsis

name_counts.get('name')

first
Abraham    2
Diego      2
Audrey     2
          ..
Diya       1
Dante      1
Zirong     1
Name: name, Length: 134, dtype: int64

# Not necessarily meaningful, but doable.
name_counts.get('name').apply(abs)

first
Abraham    2
Diego      2
Audrey     2
          ..
Diya       1
Dante      1
Zirong     1
Name: name, Length: 134, dtype: int64

name_counts

name_counts.index

Index(['Abraham', 'Diego', 'Audrey', 'Sophia', 'Alex', 'Evan', 'Aaron', 'Nhan',
       'Nancy', 'Natalie',
       ...
       'Giang', 'Georgia', 'Ganya', 'Ethan', 'Enoch', 'Emma', 'Elizabeth',
       'Diya', 'Dante', 'Zirong'],
      dtype='object', name='first', length=134)

name_counts.index.apply(max)

---------------------------------------------------------------------------
AttributeError                            Traceback (most recent call last)
Cell In[24], line 1
----> 1 name_counts.index.apply(max)

AttributeError: 'Index' object has no attribute 'apply'

name_counts.reset_index()

# What is the max of an individual string?
name_counts.reset_index().get('first').apply(max)

0      r
1      o
2      y
      ..
131    y
132    t
133    r
Name: first, Length: 134, dtype: object

def first_char(in_str):
    lol_str = in_str[0]
    return lol_str

first_char('Aedan')

'A'

name_counts.reset_index().get('first').apply(first_char)

0      A
1      D
2      A
      ..
131    D
132    D
133    Z
Name: first, Length: 134, dtype: object

roster

which_section = roster[roster.get('name') == 'Evan Flmeik'].get('section').iloc[0]
which_section

'9AM'

first_cond = roster.get('first') == 'Evan' # A Boolean Series!
section_cond = roster.get('section') == which_section # A Boolean Series!
how_many = roster[first_cond & section_cond].shape[0]
how_many

1

def shared_first_and_section(name):
    # First, find the row corresponding to that full name in roster.
    # We're assuming that full names are unique.
    row = roster[roster.get('name') == name]
    
    # Then, get that student's first name and section.
    first = row.get('first').iloc[0]
    section = row.get('section').iloc[0]
    
    # Now, find all the students with the same first name and section.
    shared_info = roster[(roster.get('first') == first) & (roster.get('section') == section)]
    
    # Return the number of such students.
    return shared_info.shape[0]

shared_first_and_section('Evan Flmeik')

1

roster = roster.assign(shared=roster.get('name').apply(shared_first_and_section))
roster

roster[(roster.get('shared') >= 2)].sort_values('shared', ascending=False)

...

Ellipsis

Lecture 6, Part 1: Applying¶

DSC 10, Summer 2025¶

Agenda¶

Quick recap of functions¶

Functions are "recipes"¶

Applying functions to DataFrames¶

DSC 10 student data¶

Example: Common first names¶

Using our `first_name` function¶

`.apply`¶

Example: Common first names¶

Activity¶

`.apply` works with built-in functions, too!¶

Aside: Resetting the index¶

Example: Shared first names and sections¶

Another function: `shared_first_and_section`¶

Activity¶

	name	section
0	Cindy Ubobpd	9AM
1	Madeleine Omidge	11AM
2	Caleb Ryincn	11AM
...	...	...
137	Chloe Camvgc	11AM
138	Sophie Ilvrib	11AM
139	Lani Rpcmgt	11AM

	name	section	first
0	Cindy Ubobpd	9AM	Cindy
1	Madeleine Omidge	11AM	Madeleine
2	Caleb Ryincn	11AM	Caleb
...	...	...	...
137	Chloe Camvgc	11AM	Chloe
138	Sophie Ilvrib	11AM	Sophie
139	Lani Rpcmgt	11AM	Lani

	first	name
0	Abraham	2
1	Diego	2
2	Audrey	2
...	...	...
131	Diya	1
132	Dante	1
133	Zirong	1

	name	section	first	shared
75	Audrey Vmoxvk	11AM	Audrey	2
84	Alex Ihhwal	9AM	Alex	2
95	Audrey Deofem	11AM	Audrey	2
113	Alex Dxnfiv	9AM	Alex	2

Lecture 6, Part 1: Applying¶

DSC 10, Summer 2025¶

Agenda¶

Quick recap of functions¶

Functions are "recipes"¶

Applying functions to DataFrames¶

DSC 10 student data¶

Example: Common first names¶

Using our first_name function¶

.apply¶

Example: Common first names¶

Activity¶

.apply works with built-in functions, too!¶

Aside: Resetting the index¶

Example: Shared first names and sections¶

Another function: shared_first_and_section¶

Activity¶

Using our `first_name` function¶

`.apply`¶

`.apply` works with built-in functions, too!¶

Another function: `shared_first_and_section`¶