summarize
summarize(__data, *args, **kwargs)
    Assign variables that are single number summaries of a DataFrame.
Grouped DataFrames will produce one row for each group. Otherwise, summarize produces a DataFrame with a single row.
Parameters:
| Name | Type | Description | Default | 
|---|---|---|---|
__data | 
        a DataFrame | 
        The data being summarized.  | 
        required | 
**kwargs | 
        new_col_name=value pairs, where value can be a function taking a single argument for the data being operated on.  | 
        {} | 
      
Examples:
>>> from siuba import _, group_by, summarize
>>> from siuba.data import cars
>>> cars >> summarize(avg = _.mpg.mean(), n = _.shape[0])
         avg   n
0  20.090625  32
>>> g_cyl = cars >> group_by(_.cyl)
>>> g_cyl >> summarize(min = _.mpg.min())
   cyl   min
0    4  21.4
1    6  17.8
2    8  10.4
>>> g_cyl >> summarize(mpg_std_err = _.mpg.std() / _.shape[0]**.5)
   cyl  mpg_std_err
0    4     1.359764
1    6     0.549397
2    8     0.684202
Source code in siuba/dply/verbs.py
          @singledispatch2(DataFrame)
def summarize(__data, *args, **kwargs):
    """Assign variables that are single number summaries of a DataFrame.
    Grouped DataFrames will produce one row for each group. Otherwise, summarize
    produces a DataFrame with a single row.
    Parameters
    ----------
    __data: a DataFrame
        The data being summarized.
    **kwargs:
        new_col_name=value pairs, where value can be a function taking
        a single argument for the data being operated on.
    Examples
    --------
    >>> from siuba import _, group_by, summarize
    >>> from siuba.data import cars
    >>> cars >> summarize(avg = _.mpg.mean(), n = _.shape[0])
             avg   n
    0  20.090625  32
    >>> g_cyl = cars >> group_by(_.cyl)
    >>> g_cyl >> summarize(min = _.mpg.min())
       cyl   min
    0    4  21.4
    1    6  17.8
    2    8  10.4
    >>> g_cyl >> summarize(mpg_std_err = _.mpg.std() / _.shape[0]**.5)
       cyl  mpg_std_err
    0    4     1.359764
    1    6     0.549397
    2    8     0.684202
    """
    results = {}
    for ii, expr in enumerate(args):
        if not callable(expr):
            raise TypeError(
                "Unnamed arguments to summarize must be callable, but argument number "
                f"{ii} was type: {type(expr)}"
            )
        res = expr(__data)
        if isinstance(res, DataFrame):
            if len(res) != 1:
                raise ValueError(
                    f"Summarize argument `{ii}` returned a DataFrame with {len(res)} rows."
                    " Result must only be a single row."
                )
            for col_name in res.columns:
                results[col_name] = res[col_name].array
        else:
            raise ValueError(
                "Unnamed arguments to summarize must return a DataFrame, but argument "
                f"`{ii} returned type: {type(expr)}"
            )
    for k, v in kwargs.items():
        # TODO: raise error if a named expression returns a DataFrame
        res = v(__data) if callable(v) else v
        if is_scalar(res) or len(res) == 1:
            # keep result, but use underlying array to avoid crazy index issues
            # on DataFrame construction (#138)
            results[k] = res.array if isinstance(res, pd.Series) else res
        else:
            raise ValueError(
                f"Summarize argument `{k}` must return result of length 1 or a scalar.\n\n"
                f"Result type: {type(res)}\n"
                f"Result length: {len(res)}"
            )
    # must pass index, or raises error when using all scalar values
    return DataFrame(results, index = [0])