`tea_tasting.aggr` #

Module for working with aggregated statistics: count, mean, var, cov.

`Aggregates(count_=None, mean_={}, var_={}, cov_={})` #

Bases: ReprMixin

Aggregated statistics.

Parameters:

Name	Type	Description	Default
`count_`	`int \| None`	Sample size (number of observations).	`None`
`mean_`	`dict[str, float \| int]`	Dictionary of sample means with variable names as keys.	`{}`
`var_`	`dict[str, float \| int]`	Dictionary of sample variances with variable names as keys.	`{}`
`cov_`	`dict[tuple[str, str], float \| int]`	Dictionary of sample covariances with pairs of variable names as keys.	`{}`

Source code in src/tea_tasting/aggr.py

def __init__(
    self,
    count_: int | None = None,
    mean_: dict[str, float | int] = {},  # noqa: B006
    var_: dict[str, float | int] = {},  # noqa: B006
    cov_: dict[tuple[str, str], float | int] = {},  # noqa: B006
) -> None:
    """Aggregated statistics.

    Args:
        count_: Sample size (number of observations).
        mean_: Dictionary of sample means with variable names as keys.
        var_: Dictionary of sample variances with variable names as keys.
        cov_: Dictionary of sample covariances with pairs of variable names as keys.
    """
    self.count_ = count_
    self.mean_ = mean_
    self.var_ = var_
    self.cov_ = {_sorted_tuple(*k): v for k, v in cov_.items()}

`count()` #

Sample size (number of observations).

Returns:

Type	Description
`int`	Sample size (number of observations).

Source code in src/tea_tasting/aggr.py

def count(self) -> int:
    """Sample size (number of observations).

    Returns:
        Sample size (number of observations).
    """
    if self.count_ is None:
        raise RuntimeError("Count is None.")
    return self.count_

`cov(left, right)` #

Sample covariance.

Assume the variable is a constant if the variable name is None.

Parameters:

Name	Type	Description	Default
`left`	`str \| None`	First variable name.	required
`right`	`str \| None`	Second variable name.	required

Returns:

Type	Description
`float \| int`	Sample covariance.

Source code in src/tea_tasting/aggr.py

def cov(self, left: str | None, right: str | None) -> float | int:
    """Sample covariance.

    Assume the variable is a constant if the variable name is `None`.

    Args:
        left: First variable name.
        right: Second variable name.

    Returns:
        Sample covariance.
    """
    if left is None or right is None:
        return 0
    return self.cov_[_sorted_tuple(left, right)]

`mean(name)` #

Sample mean.

Assume the variable is a constant 1 if the variable name is None.

Parameters:

Name	Type	Description	Default
`name`	`str \| None`	Variable name.	required

Returns:

Type	Description
`float \| int`	Sample mean.

Source code in src/tea_tasting/aggr.py

def mean(self, name: str | None) -> float | int:
    """Sample mean.

    Assume the variable is a constant `1` if the variable name is `None`.

    Args:
        name: Variable name.

    Returns:
        Sample mean.
    """
    if name is None:
        return 1
    return self.mean_[name]

`ratio_cov(left_numer, left_denom, right_numer, right_denom)` #

Sample covariance of the ratios of variables using the delta method.

Parameters:

Name	Type	Description	Default
`left_numer`	`str \| None`	First numerator variable name.	required
`left_denom`	`str \| None`	First denominator variable name.	required
`right_numer`	`str \| None`	Second numerator variable name.	required
`right_denom`	`str \| None`	Second denominator variable name.	required

Returns:

Type	Description
`float \| int`	Sample covariance of the ratios of variables.

References

Source code in src/tea_tasting/aggr.py

def ratio_cov(
    self,
    left_numer: str | None,
    left_denom: str | None,
    right_numer: str | None,
    right_denom: str | None,
) -> float | int:
    """Sample covariance of the ratios of variables using the delta method.

    Args:
        left_numer: First numerator variable name.
        left_denom: First denominator variable name.
        right_numer: Second numerator variable name.
        right_denom: Second denominator variable name.

    Returns:
        Sample covariance of the ratios of variables.

    References:
        - [Delta method](https://en.wikipedia.org/wiki/Delta_method).
        - [Taylor expansions for the moments of functions of random variables](https://en.wikipedia.org/wiki/Taylor_expansions_for_the_moments_of_functions_of_random_variables).
    """
    left_ratio_of_means = self.mean(left_numer) / self.mean(left_denom)
    right_ratio_of_means = self.mean(right_numer) / self.mean(right_denom)
    return (
        self.cov(left_numer, right_numer)
        - self.cov(left_numer, right_denom) * right_ratio_of_means
        - self.cov(left_denom, right_numer) * left_ratio_of_means
        + self.cov(left_denom, right_denom)
            * left_ratio_of_means * right_ratio_of_means
    ) / self.mean(left_denom) / self.mean(right_denom)

`ratio_var(numer, denom)` #

Sample variance of the ratio of two variables using the delta method.

Parameters:

Name	Type	Description	Default
`numer`	`str \| None`	Numerator variable name.	required
`denom`	`str \| None`	Denominator variable name.	required

Returns:

Type	Description
`float \| int`	Sample variance of the ratio of two variables.

References

Source code in src/tea_tasting/aggr.py

def ratio_var(
    self,
    numer: str | None,
    denom: str | None,
) -> float | int:
    """Sample variance of the ratio of two variables using the delta method.

    Args:
        numer: Numerator variable name.
        denom: Denominator variable name.

    Returns:
        Sample variance of the ratio of two variables.

    References:
        - [Delta method](https://en.wikipedia.org/wiki/Delta_method).
        - [Taylor expansions for the moments of functions of random variables](https://en.wikipedia.org/wiki/Taylor_expansions_for_the_moments_of_functions_of_random_variables).
    """
    numer_mean_sq = self.mean(numer) * self.mean(numer)
    denom_mean_sq = self.mean(denom) * self.mean(denom)
    return (
        self.var(numer)
        - 2 * self.cov(numer, denom) * self.mean(numer) / self.mean(denom)
        + self.var(denom) * numer_mean_sq / denom_mean_sq
    ) / denom_mean_sq

`var(name)` #

Sample variance.

Assume the variable is a constant if the variable name is None.

Parameters:

Name	Type	Description	Default
`name`	`str \| None`	Variable name.	required

Returns:

Type	Description
`float \| int`	Sample variance.

Source code in src/tea_tasting/aggr.py

def var(self, name: str | None) -> float | int:
    """Sample variance.

    Assume the variable is a constant if the variable name is `None`.

    Args:
        name: Variable name.

    Returns:
        Sample variance.
    """
    if name is None:
        return 0
    return self.var_[name]

`with_zero_div()` #

Return aggregates that do not raise an error on division by zero.

Division by zero returns:

inf if numerator is greater than 0,
nan if numerator is equal to or less than 0.

Source code in src/tea_tasting/aggr.py

def with_zero_div(self) -> Aggregates:
    """Return aggregates that do not raise an error on division by zero.

    Division by zero returns:

    - `inf` if numerator is greater than `0`,
    - `nan` if numerator is equal to or less than `0`.
    """
    return Aggregates(
        count_=None if self.count_ is None else tea_tasting.utils.Int(self.count_),
        mean_={k: tea_tasting.utils.numeric(v) for k, v in self.mean_.items()},
        var_={k: tea_tasting.utils.numeric(v) for k, v in self.var_.items()},
        cov_={k: tea_tasting.utils.numeric(v) for k, v in self.cov_.items()},
    )

`read_aggregates(data, group_col, *, has_count, mean_cols, var_cols, cov_cols)` #

Extract aggregated statistics.

Parameters:

Name	Type	Description	Default
`data`	`Table \| IntoFrame`	Granular data.	required
`group_col`	`str \| None`	Column name to group by before aggregation. If `None`, total aggregates are calculated.	required
`has_count`	`bool`	If `True`, calculate the sample size.	required
`mean_cols`	`Sequence[str]`	Column names for calculation of sample means.	required
`var_cols`	`Sequence[str]`	Column names for calculation of sample variances.	required
`cov_cols`	`Sequence[tuple[str, str]]`	Pairs of column names for calculation of sample covariances.	required

Returns:

Type	Description
`dict[object, Aggregates] \| Aggregates`	Aggregated statistics.

Source code in src/tea_tasting/aggr.py

def read_aggregates(
    data: ibis.expr.types.Table | narwhals.typing.IntoFrame,
    group_col: str | None,
    *,
    has_count: bool,
    mean_cols: Sequence[str],
    var_cols: Sequence[str],
    cov_cols: Sequence[tuple[str, str]],
) -> dict[object, Aggregates] | Aggregates:
    """Extract aggregated statistics.

    Args:
        data: Granular data.
        group_col: Column name to group by before aggregation.
            If `None`, total aggregates are calculated.
        has_count: If `True`, calculate the sample size.
        mean_cols: Column names for calculation of sample means.
        var_cols: Column names for calculation of sample variances.
        cov_cols: Pairs of column names for calculation of sample covariances.

    Returns:
        Aggregated statistics.
    """
    mean_cols, var_cols, cov_cols = _validate_aggr_cols(mean_cols, var_cols, cov_cols)

    if isinstance(data, ibis.expr.types.Table):
        aggr_data = _read_aggr_ibis(
            data=data,
            group_col=group_col,
            has_count=has_count,
            mean_cols=mean_cols,
            var_cols=var_cols,
            cov_cols=cov_cols,
        )
    else:
        aggr_data = _read_aggr_narwhals(
            data=data,
            group_col=group_col,
            has_count=has_count,
            mean_cols=mean_cols,
            var_cols=var_cols,
            cov_cols=cov_cols,
        )

    if group_col is None:
        return _get_aggregates(
            aggr_data[0],
            has_count=has_count,
            mean_cols=mean_cols,
            var_cols=var_cols,
            cov_cols=cov_cols,
        )

    return {
        group_data[group_col]: _get_aggregates(
            group_data,
            has_count=has_count,
            mean_cols=mean_cols,
            var_cols=var_cols,
            cov_cols=cov_cols,
        )
        for group_data in aggr_data
    }

tea_tasting.aggr #

Aggregates(count_=None, mean_={}, var_={}, cov_={}) #

count() #

cov(left, right) #

mean(name) #

ratio_cov(left_numer, left_denom, right_numer, right_denom) #

ratio_var(numer, denom) #

var(name) #

with_zero_div() #

read_aggregates(data, group_col, *, has_count, mean_cols, var_cols, cov_cols) #

`tea_tasting.aggr` #

`Aggregates(count_=None, mean_={}, var_={}, cov_={})` #

`count()` #

`cov(left, right)` #

`mean(name)` #

`ratio_cov(left_numer, left_denom, right_numer, right_denom)` #

`ratio_var(numer, denom)` #

`var(name)` #

`with_zero_div()` #

`read_aggregates(data, group_col, *, has_count, mean_cols, var_cols, cov_cols)` #