Skip to content

Verb spread

spread(__data, key, value, fill=None, reset_index=True)

Reshape table by spreading it out to wide format.

Parameters:

Name Type Description Default
__data

The input data.

required
key

Column whose values will be used as new column names.

required
value

Column whose values will fill the new column entries.

required
fill

Value to set for any missing values. By default keeps them as missing values.

None

Examples:

>>> import pandas as pd                                                
>>> from siuba import _, gather                                        
>>> df = pd.DataFrame({"id": ["a", "b"], "x": [1, 2], "y": [3, None]}) 
>>> long = gather(df, "key", "value", -_.id, drop_na=True)
>>> long
  id key  value
0  a   x    1.0
1  b   x    2.0
2  a   y    3.0
>>> spread(long, "key", "value")
  id    x    y
0  a  1.0  3.0
1  b  2.0  NaN
Source code in siuba/dply/verbs.py
@singledispatch2(pd.DataFrame)
def spread(__data, key, value, fill = None, reset_index = True):
    """Reshape table by spreading it out to wide format.

    Parameters
    ----------
    __data:
        The input data.
    key:
        Column whose values will be used as new column names.
    value:
        Column whose values will fill the new column entries.
    fill:
        Value to set for any missing values. By default keeps them as missing values.


    Examples
    --------
    >>> import pandas as pd                                                
    >>> from siuba import _, gather                                        

    >>> df = pd.DataFrame({"id": ["a", "b"], "x": [1, 2], "y": [3, None]}) 

    >>> long = gather(df, "key", "value", -_.id, drop_na=True)
    >>> long
      id key  value
    0  a   x    1.0
    1  b   x    2.0
    2  a   y    3.0

    >>> spread(long, "key", "value")
      id    x    y
    0  a  1.0  3.0
    1  b  2.0  NaN

    """
    key_col = _get_single_var_select(__data.columns, key)
    val_col = _get_single_var_select(__data.columns, value)

    id_cols = [col for col in __data.columns if col not in (key_col, val_col)]
    wide = __data.set_index(id_cols + [key_col]).unstack(level = -1)

    if fill is not None:
        wide.fillna(fill, inplace = True)

    # remove multi-index from both rows and cols
    wide.columns = wide.columns.droplevel().rename(None)
    if reset_index:
        wide.reset_index(inplace = True)

    return wide