separate, unite
separate(__data, col, into, sep='[^a-zA-Z0-9]', remove=True, convert=False, extra='warn', fill='warn')
    Split col into len(into) piece. Return DataFrame with a column added for each piece.
Parameters:
| Name | Type | Description | Default | 
|---|---|---|---|
__data | 
        a DataFrame.  | 
        required | |
col | 
        name of column to split (either string, or siu expression).  | 
        required | |
into | 
        names of resulting columns holding each entry in split.  | 
        required | |
sep | 
        regular expression used to split col. Passed to col.str.split method.  | 
        '[^a-zA-Z0-9]' | 
      |
remove | 
        whether to remove col from the returned DataFrame.  | 
        True | 
      |
convert | 
        whether to attempt to convert the split columns to numerics.  | 
        False | 
      |
extra | 
        what to do when more splits than into names. One of ("warn", "drop" or "merge"). "warn" produces a warning; "drop" and "merge" currently not implemented.  | 
        'warn' | 
      |
fill | 
        what to do when fewer splits than into names. Currently not implemented.  | 
        'warn' | 
      
Examples:
>>> import pandas as pd
>>> from siuba import separate
>>> df = pd.DataFrame({"label": ["S1-1", "S2-2"]})
Split into two columns:
>>> separate(df, "label", into = ["season", "episode"])
  season episode
0     S1       1
1     S2       2
Split, and try to convert columns to numerics:
>>> separate(df, "label", into = ["season", "episode"], convert = True)
  season  episode
0     S1        1
1     S2        2
Source code in siuba/dply/verbs.py
          @singledispatch2(pd.DataFrame)
def separate(__data, col, into, sep = r"[^a-zA-Z0-9]",
             remove = True, convert = False,
             extra = "warn", fill = "warn"
            ):
    """Split col into len(into) piece. Return DataFrame with a column added for each piece.
    Parameters
    ----------
    __data:
        a DataFrame.
    col:
        name of column to split (either string, or siu expression).
    into:
        names of resulting columns holding each entry in split.
    sep:
        regular expression used to split col. Passed to col.str.split method.
    remove:
        whether to remove col from the returned DataFrame.
    convert:
        whether to attempt to convert the split columns to numerics.
    extra:
        what to do when more splits than into names.  One of ("warn", "drop" or "merge").
        "warn" produces a warning; "drop" and "merge" currently not implemented.
    fill:
        what to do when fewer splits than into names. Currently not implemented.
    Examples
    --------
    >>> import pandas as pd
    >>> from siuba import separate
    >>> df = pd.DataFrame({"label": ["S1-1", "S2-2"]})
    Split into two columns:
    >>> separate(df, "label", into = ["season", "episode"])
      season episode
    0     S1       1
    1     S2       2
    Split, and try to convert columns to numerics:
    >>> separate(df, "label", into = ["season", "episode"], convert = True)
      season  episode
    0     S1        1
    1     S2        2
    """
    n_into = len(into)
    col_name = simple_varname(col)
    # splitting column ----
    all_splits = __data[col_name].str.split(sep, expand = True)
    n_split_cols = len(all_splits.columns)
    # handling too many or too few splits ----
    if  n_split_cols < n_into:
        # too few columns
        raise ValueError("Expected %s split cols, found %s" %(n_into, n_split_cols))
    elif n_split_cols > n_into:
        # Extra argument controls how we deal with too many splits
        if extra == "warn":
            df_extra_cols = all_splits.iloc[:, n_into].reset_index(drop=True)
            bad_rows = df_extra_cols.dropna(how="all")
            n_extra = bad_rows.shape[0]
            warnings.warn(
                f"Expected {n_into} pieces."
                f"Additional pieces discarded in {n_extra} rows."
                f"Row numbers: {bad_rows.index.values}",
                UserWarning
            )
        elif extra == "drop":
            pass
        elif extra == "merge":
            raise NotImplementedError("TODO: separate extra = 'merge'")
        else:
            raise ValueError("Invalid extra argument: %s" %extra)
    # create new columns in data ----
    out = __data.copy()
    for ii, name in enumerate(into):
        out[name] = all_splits.iloc[:, ii]
    #out = pd.concat([__data, keep_splits], axis = 1)
    # attempt to convert columns to numeric ----
    if convert:
        # TODO: better strategy here? 
        for k in into:
            try:
                out[k] = pd.to_numeric(out[k])
            except ValueError:
                pass
    if remove and col_name not in into:
        return out.drop(columns = col_name)
    return out
unite(__data, col, *args, *, sep='_', remove=True)
    Combine multiple columns into a single column. Return DataFrame that column included.
Parameters:
| Name | Type | Description | Default | 
|---|---|---|---|
__data | 
        a DataFrame  | 
        required | |
col | 
        name of the to-be-created column (string).  | 
        required | |
*args | 
        names of each column to combine.  | 
        () | 
      |
sep | 
        separator joining each column being combined.  | 
        '_' | 
      |
remove | 
        whether to remove the combined columns from the returned DataFrame.  | 
        True | 
      
Source code in siuba/dply/verbs.py
          @singledispatch2(pd.DataFrame)
def unite(__data, col, *args, sep = "_", remove = True):
    """Combine multiple columns into a single column. Return DataFrame that column included.
    Parameters
    ----------
    __data:
        a DataFrame
    col:
        name of the to-be-created column (string).
    *args:
        names of each column to combine.
    sep:
        separator joining each column being combined.
    remove:
        whether to remove the combined columns from the returned DataFrame.
    """
    unite_col_names = list(map(simple_varname, args))
    out_col_name = simple_varname(col)
    # validations ----
    if None in unite_col_names:
        raise ValueError("*args must be string, or simple column name, e.g. _.col_name")
    missing_cols = set(unite_col_names) - set(__data.columns)
    if missing_cols:
        raise ValueError("columns %s not in DataFrame.columns" %missing_cols)
    unite_cols = [_coerce_to_str(__data[col_name]) for col_name in unite_col_names]
    if out_col_name in __data:
        raise ValueError("col argument %s already a column in data" % out_col_name)
    # perform unite ----
    # TODO: this is probably not very efficient. Maybe try with transform or apply?
    res = reduce(lambda x,y: x + sep + y, unite_cols)
    out_df = __data.copy()
    out_df[out_col_name] = res
    if remove:
        return out_df.drop(columns = unite_col_names)
    return out_df