Verb unnest
unnest(__data, key='data')
Unnest a column holding nested data (e.g. Series of lists or DataFrames).
Parameters:
Name | Type | Description | Default |
---|---|---|---|
___data |
A DataFrame. |
required | |
key |
The name of the column to be unnested. |
'data' |
Examples:
>>> import pandas as pd
>>> df = pd.DataFrame({'id': [1,2], 'data': [['a', 'b'], ['c']]})
>>> df >> unnest()
id data
0 1 a
1 1 b
2 2 c
Source code in siuba/dply/verbs.py
@singledispatch2(pd.DataFrame)
def unnest(__data, key = "data"):
"""Unnest a column holding nested data (e.g. Series of lists or DataFrames).
Parameters
----------
___data:
A DataFrame.
key:
The name of the column to be unnested.
Examples
--------
>>> import pandas as pd
>>> df = pd.DataFrame({'id': [1,2], 'data': [['a', 'b'], ['c']]})
>>> df >> unnest()
id data
0 1 a
1 1 b
2 2 c
"""
# TODO: currently only takes key, not expressions
nrows_nested = __data[key].apply(len, convert_dtype = True)
indx_nested = nrows_nested.index.repeat(nrows_nested)
grp_keys = list(__data.columns[__data.columns != key])
# flatten nested data
data_entries = map(_convert_nested_entry, __data[key])
long_data = pd.concat(data_entries, ignore_index = True)
long_data.name = key
# may be a better approach using a multi-index
long_grp = __data.loc[indx_nested, grp_keys].reset_index(drop = True)
return long_grp.join(long_data)