gather, spread

`gather(__data, key='key', value='value', args, , drop_na=False, convert=False)`

Reshape table by gathering it in to long format.

Parameters:

Name	Type	Description	Default
`__data`		The input data.	required
`key`		Name of the key (or measure) column, which holds the names of the columns that were turned into rows.	`'key'`
`value`		Name of the value column, which holds the values from the columns that were turned into rows.	`'value'`
`*args`		A selection of columns. If unspecified, all columns are selected. Any arguments you could pass to the select() verb are allowed.	`()`
`drop_na`	`bool`	Whether to remove any rows where the value column is NA.	`False`

Examples:

>>> import pandas as pd
>>> from siuba import _, gather

>>> df = pd.DataFrame({"id": ["a", "b"], "x": [1, 2], "y": [3, None]})

The code below gathers in all columns, except id:

>>> gather(df, "key", "value", -_.id)
  id key  value
0  a   x    1.0
1  b   x    2.0
2  a   y    3.0
3  b   y    NaN

>>> gather(df, "measure", "result", _.x, _.y, drop_na=True)
  id measure  result
0  a       x     1.0
1  b       x     2.0
2  a       y     3.0

Source code in siuba/dply/verbs.py

@singledispatch2((pd.DataFrame, DataFrameGroupBy))
def gather(__data, key = "key", value = "value", *args, drop_na = False, convert = False):
    """Reshape table by gathering it in to long format.

    Parameters
    ----------
    __data:
        The input data.
    key:
        Name of the key (or measure) column, which holds the names of the columns
        that were turned into rows.
    value:
        Name of the value column, which holds the values from the columns that
        were turned into rows.
    *args:
        A selection of columns. If unspecified, all columns are selected. Any
        arguments you could pass to the select() verb are allowed.
    drop_na: bool
        Whether to remove any rows where the value column is NA.


    Examples
    --------

    >>> import pandas as pd
    >>> from siuba import _, gather

    >>> df = pd.DataFrame({"id": ["a", "b"], "x": [1, 2], "y": [3, None]})

    The code below gathers in all columns, except id:

    >>> gather(df, "key", "value", -_.id)
      id key  value
    0  a   x    1.0
    1  b   x    2.0
    2  a   y    3.0
    3  b   y    NaN

    >>> gather(df, "measure", "result", _.x, _.y, drop_na=True)
      id measure  result
    0  a       x     1.0
    1  b       x     2.0
    2  a       y     3.0

    """
    # TODO: implement var selection over *args
    if convert:
        raise NotImplementedError("convert not yet implemented")

    # TODO: copied from nest and select
    var_list = var_create(*(args or __data.columns))
    od = var_select(__data.columns, *var_list)

    if not od:
        return __data

    id_vars = [col for col in __data.columns if col not in od]
    long = pd.melt(__data, id_vars, list(od), key, value)

    if drop_na:
        return long[~long[value].isna()].reset_index(drop = True)

    return long

`spread(__data, key, value, fill=None, reset_index=True)`

Reshape table by spreading it out to wide format.