Skip to content

count

count(__data, *args, *, wt=None, sort=False, name=None, **kwargs)

Summarize data with the number of rows for each grouping of data.

Parameters:

Name Type Description Default
__data

A DataFrame.

required
*args

The names of columns to be used for grouping. Passed to group_by.

()
wt

The name of a column to use as a weighted for each row.

None
sort

Whether to sort the results in descending order.

False
**kwargs

Creates a new named column, and uses for grouping. Passed to group_by.

{}

Examples:

>>> from siuba import _, count, group_by, summarize, arrange
>>> from siuba.data import mtcars
>>> count(mtcars, _.cyl, high_mpg = _.mpg > 30)
   cyl  high_mpg   n
0    4     False   7
1    4      True   4
2    6     False   7
3    8     False  14

Use sort to order results by number of observations (in descending order).

>>> count(mtcars, _.cyl, sort=True)
   cyl   n
0    8  14
1    4  11
2    6   7

count is equivalent to doing a grouped summarize:

>>> mtcars >> group_by(_.cyl) >> summarize(n = _.shape[0]) >> arrange(-_.n)
   cyl   n
2    8  14
0    4  11
1    6   7
Source code in siuba/dply/verbs.py
@singledispatch2((pd.DataFrame, DataFrameGroupBy))
def count(__data, *args, wt = None, sort = False, name=None, **kwargs):
    """Summarize data with the number of rows for each grouping of data.

    Parameters
    ----------
    __data:
        A DataFrame.
    *args:
        The names of columns to be used for grouping. Passed to group_by.
    wt:
        The name of a column to use as a weighted for each row.
    sort:
        Whether to sort the results in descending order.
    **kwargs:
        Creates a new named column, and uses for grouping. Passed to group_by.

    Examples
    --------

    >>> from siuba import _, count, group_by, summarize, arrange
    >>> from siuba.data import mtcars

    >>> count(mtcars, _.cyl, high_mpg = _.mpg > 30)
       cyl  high_mpg   n
    0    4     False   7
    1    4      True   4
    2    6     False   7
    3    8     False  14

    Use sort to order results by number of observations (in descending order).

    >>> count(mtcars, _.cyl, sort=True)
       cyl   n
    0    8  14
    1    4  11
    2    6   7

    count is equivalent to doing a grouped summarize:

    >>> mtcars >> group_by(_.cyl) >> summarize(n = _.shape[0]) >> arrange(-_.n)
       cyl   n
    2    8  14
    0    4  11
    1    6   7


    """
    no_grouping_vars = not args and not kwargs and isinstance(__data, pd.DataFrame)

    if wt is None:
        if no_grouping_vars: 
            # no groups, just use number of rows
            counts = pd.DataFrame({'tmp': [__data.shape[0]]})
        else:
            # tally rows for each group
            counts = group_by(__data, *args, add = True, **kwargs).size().reset_index()
    else:
        wt_col = simple_varname(wt)
        if wt_col is None:
            raise Exception("wt argument has to be simple column name")

        if no_grouping_vars:
            # no groups, sum weights
            counts = pd.DataFrame({'tmp': [__data[wt_col].sum()]})
        else:
            # do weighted tally
            counts = group_by(__data, *args, add = True, **kwargs)[wt_col].sum().reset_index()


    # count col named, n. If that col already exists, add more "n"s...
    out_col = _check_name(name, set(counts.columns))

    # rename the tally column to correct name
    counts.rename(columns = {counts.columns[-1]: out_col}, inplace = True)

    if sort:
        return counts.sort_values(out_col, ascending = False).reset_index(drop = True)

    return counts