Skip to content

Verb unite

unite(__data, col, *args, *, sep='_', remove=True)

Combine multiple columns into a single column. Return DataFrame that column included.

Parameters:

Name Type Description Default
__data

a DataFrame

required
col

name of the to-be-created column (string).

required
*args

names of each column to combine.

()
sep

separator joining each column being combined.

'_'
remove

whether to remove the combined columns from the returned DataFrame.

True
Source code in siuba/dply/verbs.py
@singledispatch2(pd.DataFrame)
def unite(__data, col, *args, sep = "_", remove = True):
    """Combine multiple columns into a single column. Return DataFrame that column included.

    Parameters
    ----------
    __data:
        a DataFrame
    col:
        name of the to-be-created column (string).
    *args:
        names of each column to combine.
    sep:
        separator joining each column being combined.
    remove:
        whether to remove the combined columns from the returned DataFrame.

    """
    unite_col_names = list(map(simple_varname, args))
    out_col_name = simple_varname(col)

    # validations ----
    if None in unite_col_names:
        raise ValueError("*args must be string, or simple column name, e.g. _.col_name")

    missing_cols = set(unite_col_names) - set(__data.columns)
    if missing_cols:
        raise ValueError("columns %s not in DataFrame.columns" %missing_cols)


    unite_cols = [_coerce_to_str(__data[col_name]) for col_name in unite_col_names]

    if out_col_name in __data:
        raise ValueError("col argument %s already a column in data" % out_col_name)

    # perform unite ----
    # TODO: this is probably not very efficient. Maybe try with transform or apply?
    res = reduce(lambda x,y: x + sep + y, unite_cols)

    out_df = __data.copy()
    out_df[out_col_name] = res

    if remove:
        return out_df.drop(columns = unite_col_names)

    return out_df