Verb unite
unite(__data, col, *args, *, sep='_', remove=True)
Combine multiple columns into a single column. Return DataFrame that column included.
Parameters:
Name | Type | Description | Default |
---|---|---|---|
__data |
a DataFrame |
required | |
col |
name of the to-be-created column (string). |
required | |
*args |
names of each column to combine. |
() |
|
sep |
separator joining each column being combined. |
'_' |
|
remove |
whether to remove the combined columns from the returned DataFrame. |
True |
Source code in siuba/dply/verbs.py
@singledispatch2(pd.DataFrame)
def unite(__data, col, *args, sep = "_", remove = True):
"""Combine multiple columns into a single column. Return DataFrame that column included.
Parameters
----------
__data:
a DataFrame
col:
name of the to-be-created column (string).
*args:
names of each column to combine.
sep:
separator joining each column being combined.
remove:
whether to remove the combined columns from the returned DataFrame.
"""
unite_col_names = list(map(simple_varname, args))
out_col_name = simple_varname(col)
# validations ----
if None in unite_col_names:
raise ValueError("*args must be string, or simple column name, e.g. _.col_name")
missing_cols = set(unite_col_names) - set(__data.columns)
if missing_cols:
raise ValueError("columns %s not in DataFrame.columns" %missing_cols)
unite_cols = [_coerce_to_str(__data[col_name]) for col_name in unite_col_names]
if out_col_name in __data:
raise ValueError("col argument %s already a column in data" % out_col_name)
# perform unite ----
# TODO: this is probably not very efficient. Maybe try with transform or apply?
res = reduce(lambda x,y: x + sep + y, unite_cols)
out_df = __data.copy()
out_df[out_col_name] = res
if remove:
return out_df.drop(columns = unite_col_names)
return out_df