Skip to content

Verb unnest

unnest(__data, key='data')

Unnest a column holding nested data (e.g. Series of lists or DataFrames).

Parameters:

Name Type Description Default
___data

A DataFrame.

required
key

The name of the column to be unnested.

'data'

Examples:

>>> import pandas as pd
>>> df = pd.DataFrame({'id': [1,2], 'data': [['a', 'b'], ['c']]})
>>> df >> unnest()
   id data
0   1    a
1   1    b
2   2    c
Source code in siuba/dply/verbs.py
@singledispatch2(pd.DataFrame)
def unnest(__data, key = "data"):
    """Unnest a column holding nested data (e.g. Series of lists or DataFrames).

    Parameters
    ----------
    ___data:
        A DataFrame.
    key:
        The name of the column to be unnested.

    Examples
    --------

    >>> import pandas as pd
    >>> df = pd.DataFrame({'id': [1,2], 'data': [['a', 'b'], ['c']]})
    >>> df >> unnest()
       id data
    0   1    a
    1   1    b
    2   2    c

    """
    # TODO: currently only takes key, not expressions
    nrows_nested = __data[key].apply(len, convert_dtype = True)
    indx_nested = nrows_nested.index.repeat(nrows_nested)

    grp_keys = list(__data.columns[__data.columns != key])

    # flatten nested data
    data_entries = map(_convert_nested_entry, __data[key])
    long_data = pd.concat(data_entries, ignore_index = True)
    long_data.name = key

    # may be a better approach using a multi-index
    long_grp = __data.loc[indx_nested, grp_keys].reset_index(drop = True)

    return long_grp.join(long_data)