from setup import ___
from siuba import *
from plotnine import *
from music_top200 import music_top200, track_features
Mutate
Click here to open the slides full screen.
Exercise 0:
Delete the #
symbol on the line # music_top200
. This symbol is called a comment, and causes everything to the right of it to be ignored.
# I am a helpful note. I am not code!
# music_top200
Exercise 1:
Currently, the duration
is calculated in seconds.
- Delete (uncomment) the
#
symbol in the code. - Complete the code to convert duration to milliseconds.
hint
Delete the # symbol. This is called a comment. When it is the first non-blank on a line, it stops that line from being run.
1 second is 1000 milliseconds. So you would multiply the duration by 1000.
(music_top200# >> mutate(___ = ___)
)
country | position | track_name | artist | streams | duration | continent | |
---|---|---|---|---|---|---|---|
0 | Argentina | 1 | Tusa | KAROL G | 1858666 | 200.960 | Americas |
1 | Argentina | 2 | Tattoo | Rauw Alejandro | 1344382 | 202.887 | Americas |
2 | Argentina | 3 | Hola - Remix | Dalex | 1330011 | 249.520 | Americas |
... | ... | ... | ... | ... | ... | ... | ... |
12397 | South Africa | 198 | Black And White | Niall Horan | 11771 | 193.090 | Africa |
12398 | South Africa | 199 | When I See U | Fantasia | 11752 | 217.347 | Africa |
12399 | South Africa | 200 | Psycho! | MASN | 11743 | 197.217 | Africa |
12400 rows × 7 columns
(music_top200>> mutate(duration_ms = _.duration * 1000)
)
country | position | track_name | artist | streams | duration | continent | duration_ms | |
---|---|---|---|---|---|---|---|---|
0 | Argentina | 1 | Tusa | KAROL G | 1858666 | 200.960 | Americas | 200960.0 |
1 | Argentina | 2 | Tattoo | Rauw Alejandro | 1344382 | 202.887 | Americas | 202887.0 |
2 | Argentina | 3 | Hola - Remix | Dalex | 1330011 | 249.520 | Americas | 249520.0 |
... | ... | ... | ... | ... | ... | ... | ... | ... |
12397 | South Africa | 198 | Black And White | Niall Horan | 11771 | 193.090 | Africa | 193090.0 |
12398 | South Africa | 199 | When I See U | Fantasia | 11752 | 217.347 | Africa | 217347.0 |
12399 | South Africa | 200 | Psycho! | MASN | 11743 | 197.217 | Africa | 197217.0 |
12400 rows × 8 columns
Exercise 2:
Uncomment the mutate in the pipe below. Can you fix the two errors in it?
It should return data with a new column on the right, that divides position by 200.
(music_top200# >> mutate(new position = position / 200)
)
country | position | track_name | artist | streams | duration | continent | |
---|---|---|---|---|---|---|---|
0 | Argentina | 1 | Tusa | KAROL G | 1858666 | 200.960 | Americas |
1 | Argentina | 2 | Tattoo | Rauw Alejandro | 1344382 | 202.887 | Americas |
2 | Argentina | 3 | Hola - Remix | Dalex | 1330011 | 249.520 | Americas |
... | ... | ... | ... | ... | ... | ... | ... |
12397 | South Africa | 198 | Black And White | Niall Horan | 11771 | 193.090 | Africa |
12398 | South Africa | 199 | When I See U | Fantasia | 11752 | 217.347 | Africa |
12399 | South Africa | 200 | Psycho! | MASN | 11743 | 197.217 | Africa |
12400 rows × 7 columns
(music_top200>> mutate(new_position = _.position / 200)
)
country | position | track_name | artist | streams | duration | continent | new_position | |
---|---|---|---|---|---|---|---|---|
0 | Argentina | 1 | Tusa | KAROL G | 1858666 | 200.960 | Americas | 0.005 |
1 | Argentina | 2 | Tattoo | Rauw Alejandro | 1344382 | 202.887 | Americas | 0.010 |
2 | Argentina | 3 | Hola - Remix | Dalex | 1330011 | 249.520 | Americas | 0.015 |
... | ... | ... | ... | ... | ... | ... | ... | ... |
12397 | South Africa | 198 | Black And White | Niall Horan | 11771 | 193.090 | Africa | 0.990 |
12398 | South Africa | 199 | When I See U | Fantasia | 11752 | 217.347 | Africa | 0.995 |
12399 | South Africa | 200 | Psycho! | MASN | 11743 | 197.217 | Africa | 1.000 |
12400 rows × 8 columns
Exercise 3:
The mutate below uses code from a future chapter to calculate how many letters are in each artist’s name.
Which artist with the shortest name has the most streams?
hint
You’ll need to use an arrange in the pipe with 2 arguments.
(music_top200>> mutate(artist_length = _.artist.str.len())
)
country | position | track_name | artist | streams | duration | continent | artist_length | |
---|---|---|---|---|---|---|---|---|
0 | Argentina | 1 | Tusa | KAROL G | 1858666 | 200.960 | Americas | 7 |
1 | Argentina | 2 | Tattoo | Rauw Alejandro | 1344382 | 202.887 | Americas | 14 |
2 | Argentina | 3 | Hola - Remix | Dalex | 1330011 | 249.520 | Americas | 5 |
... | ... | ... | ... | ... | ... | ... | ... | ... |
12397 | South Africa | 198 | Black And White | Niall Horan | 11771 | 193.090 | Africa | 11 |
12398 | South Africa | 199 | When I See U | Fantasia | 11752 | 217.347 | Africa | 8 |
12399 | South Africa | 200 | Psycho! | MASN | 11743 | 197.217 | Africa | 4 |
12400 rows × 8 columns
(music_top200>> mutate(artist_length = _.artist.str.len())
>> arrange(_.artist_length, -_.streams)
)
country | position | track_name | artist | streams | duration | continent | artist_length | |
---|---|---|---|---|---|---|---|---|
5256 | Indonesia | 57 | Sweet Night | V | 529806 | 214.259 | Asia | 1 |
9666 | Philippines | 67 | Sweet Night | V | 313660 | 214.259 | Asia | 1 |
5880 | India | 81 | Sweet Night | V | 230745 | 214.259 | Asia | 1 |
... | ... | ... | ... | ... | ... | ... | ... | ... |
8366 | Mexico | 167 | Escondidos | La Adictiva Banda San José de Mesillas | 635158 | 191.893 | Americas | 38 |
11193 | El Salvador | 194 | En Peligro de Extinción | La Adictiva Banda San José de Mesillas | 11447 | 234.198 | Americas | 38 |
180 | Argentina | 181 | Un Poco de Amor Francés | Patricio Rey y sus Redonditos de Ricota | 178678 | 206.200 | Americas | 39 |
12400 rows × 8 columns