Skip to content

tea_tasting.metrics.mean #

Metrics for the analysis of means.

Mean(value, covariate=None, *, alternative=None, confidence_level=None, equal_var=None, use_t=None, alpha=None, ratio=None, power=None, effect_size=None, rel_effect_size=None, n_obs=None) #

Bases: RatioOfMeans

Metric for the analysis of means.

Parameters:

Name Type Description Default
value str

Metric value column name.

required
covariate str | None

Metric covariate column name.

None
alternative Literal['two-sided', 'greater', 'less'] | None

Alternative hypothesis.

None
confidence_level float | None

Confidence level for the confidence interval.

None
equal_var bool | None

Defines whether equal variance is assumed. If True, pooled variance is used for the calculation of the standard error of the difference between two means.

None
use_t bool | None

Defines whether to use the Student's t-distribution (True) or the Normal distribution (False).

None
alpha float | None

Significance level. Only for the analysis of power.

None
ratio float | int | None

Ratio of the number of observations in the treatment relative to the control. Only for the analysis of power.

None
power float | None

Statistical power. Only for the analysis of power.

None
effect_size float | int | Sequence[float | int] | None

Absolute effect size. Difference between the two means. Only for the analysis of power.

None
rel_effect_size float | Sequence[float] | None

Relative effect size. Difference between the two means, divided by the control mean. Only for the analysis of power.

None
n_obs int | Sequence[int] | None

Number of observations in the control and in the treatment together. Only for the analysis of power.

None
Alternative hypothesis options
  • "two-sided": the means are unequal,
  • "greater": the mean in the treatment variant is greater than the mean in the control variant,
  • "less": the mean in the treatment variant is less than the mean in the control variant.
Parameter defaults

Defaults for parameters alpha, alternative, confidence_level, equal_var, n_obs, power, ratio, and use_t can be changed using the config_context and set_context functions. See the Global configuration reference for details.

References

Examples:

import tea_tasting as tt


experiment = tt.Experiment(
    orders_per_user=tt.Mean("orders"),
    revenue_per_user=tt.Mean("revenue"),
)

data = tt.make_users_data(seed=42)
result = experiment.analyze(data)
print(result)
#>           metric control treatment rel_effect_size rel_effect_size_ci pvalue
#>  orders_per_user   0.530     0.573            8.0%       [-2.0%, 19%]  0.118
#> revenue_per_user    5.24      5.73            9.3%       [-2.4%, 22%]  0.123

With CUPED:

experiment = tt.Experiment(
    orders_per_user=tt.Mean("orders", "orders_covariate"),
    revenue_per_user=tt.Mean("revenue", "revenue_covariate"),
)

data = tt.make_users_data(seed=42, covariates=True)
result = experiment.analyze(data)
print(result)
#>           metric control treatment rel_effect_size rel_effect_size_ci  pvalue
#>  orders_per_user   0.523     0.581             11%        [2.9%, 20%] 0.00733
#> revenue_per_user    5.12      5.85             14%        [3.8%, 26%] 0.00675

Power analysis:

data = tt.make_users_data(
    seed=42,
    sessions_uplift=0,
    orders_uplift=0,
    revenue_uplift=0,
    covariates=True,
)

orders_per_user = tt.Mean(
    "orders",
    "orders_covariate",
    n_obs=(10_000, 20_000),
)
print(orders_per_user.solve_power(data))  # Solve for effect size.
#> power effect_size rel_effect_size n_obs
#>   80%      0.0374            7.2% 10000
#>   80%      0.0264            5.1% 20000

orders_per_user = tt.Mean(
    "orders",
    "orders_covariate",
    rel_effect_size=0.05,
)
# Solve for the total number of observations.
print(orders_per_user.solve_power(data, "n_obs"))
#> power effect_size rel_effect_size n_obs
#>   80%      0.0260            5.0% 20733

orders_per_user = tt.Mean(
    "orders",
    "orders_covariate",
    rel_effect_size=0.1,
)
# Solve for power. Infer number of observations from the sample.
print(orders_per_user.solve_power(data, "power"))
#> power effect_size rel_effect_size n_obs
#>   69%      0.0519             10%  4000
Source code in src/tea_tasting/metrics/mean.py
def __init__(  # noqa: PLR0913
    self,
    value: str,
    covariate: str | None = None,
    *,
    alternative: Literal["two-sided", "greater", "less"] | None = None,
    confidence_level: float | None = None,
    equal_var: bool | None = None,
    use_t: bool | None = None,
    alpha: float | None = None,
    ratio: float | int | None = None,
    power: float | None = None,
    effect_size: float | int | Sequence[float | int] | None = None,
    rel_effect_size: float | Sequence[float] | None = None,
    n_obs: int | Sequence[int] | None = None,
) -> None:
    """Metric for the analysis of means.

    Args:
        value: Metric value column name.
        covariate: Metric covariate column name.
        alternative: Alternative hypothesis.
        confidence_level: Confidence level for the confidence interval.
        equal_var: Defines whether equal variance is assumed. If `True`,
            pooled variance is used for the calculation of the standard error
            of the difference between two means.
        use_t: Defines whether to use the Student's t-distribution (`True`) or
            the Normal distribution (`False`).
        alpha: Significance level. Only for the analysis of power.
        ratio: Ratio of the number of observations in the treatment
            relative to the control. Only for the analysis of power.
        power: Statistical power. Only for the analysis of power.
        effect_size: Absolute effect size. Difference between the two means.
            Only for the analysis of power.
        rel_effect_size: Relative effect size. Difference between the two means,
            divided by the control mean. Only for the analysis of power.
        n_obs: Number of observations in the control and in the treatment together.
            Only for the analysis of power.

    Alternative hypothesis options:
        - `"two-sided"`: the means are unequal,
        - `"greater"`: the mean in the treatment variant is greater than the mean
            in the control variant,
        - `"less"`: the mean in the treatment variant is less than the mean
            in the control variant.

    Parameter defaults:
        Defaults for parameters `alpha`, `alternative`, `confidence_level`,
        `equal_var`, `n_obs`, `power`, `ratio`, and `use_t` can be changed
        using the `config_context` and `set_context` functions.
        See the [Global configuration](https://tea-tasting.e10v.me/api/config/)
        reference for details.

    References:
        - [Deng, A., Knoblich, U., & Lu, J. (2018). Applying the Delta Method in Metric Analytics: A Practical Guide with Novel Ideas](https://alexdeng.github.io/public/files/kdd2018-dm.pdf).
        - [Deng, A., Xu, Y., Kohavi, R., & Walker, T. (2013). Improving the Sensitivity of Online Controlled Experiments by Utilizing Pre-Experiment Data](https://exp-platform.com/Documents/2013-02-CUPED-ImprovingSensitivityOfControlledExperiments.pdf).

    Examples:
        ```python
        import tea_tasting as tt


        experiment = tt.Experiment(
            orders_per_user=tt.Mean("orders"),
            revenue_per_user=tt.Mean("revenue"),
        )

        data = tt.make_users_data(seed=42)
        result = experiment.analyze(data)
        print(result)
        #>           metric control treatment rel_effect_size rel_effect_size_ci pvalue
        #>  orders_per_user   0.530     0.573            8.0%       [-2.0%, 19%]  0.118
        #> revenue_per_user    5.24      5.73            9.3%       [-2.4%, 22%]  0.123
        ```

        With CUPED:

        ```python
        experiment = tt.Experiment(
            orders_per_user=tt.Mean("orders", "orders_covariate"),
            revenue_per_user=tt.Mean("revenue", "revenue_covariate"),
        )

        data = tt.make_users_data(seed=42, covariates=True)
        result = experiment.analyze(data)
        print(result)
        #>           metric control treatment rel_effect_size rel_effect_size_ci  pvalue
        #>  orders_per_user   0.523     0.581             11%        [2.9%, 20%] 0.00733
        #> revenue_per_user    5.12      5.85             14%        [3.8%, 26%] 0.00675
        ```

        Power analysis:

        ```python
        data = tt.make_users_data(
            seed=42,
            sessions_uplift=0,
            orders_uplift=0,
            revenue_uplift=0,
            covariates=True,
        )

        orders_per_user = tt.Mean(
            "orders",
            "orders_covariate",
            n_obs=(10_000, 20_000),
        )
        print(orders_per_user.solve_power(data))  # Solve for effect size.
        #> power effect_size rel_effect_size n_obs
        #>   80%      0.0374            7.2% 10000
        #>   80%      0.0264            5.1% 20000

        orders_per_user = tt.Mean(
            "orders",
            "orders_covariate",
            rel_effect_size=0.05,
        )
        # Solve for the total number of observations.
        print(orders_per_user.solve_power(data, "n_obs"))
        #> power effect_size rel_effect_size n_obs
        #>   80%      0.0260            5.0% 20733

        orders_per_user = tt.Mean(
            "orders",
            "orders_covariate",
            rel_effect_size=0.1,
        )
        # Solve for power. Infer number of observations from the sample.
        print(orders_per_user.solve_power(data, "power"))
        #> power effect_size rel_effect_size n_obs
        #>   69%      0.0519             10%  4000
        ```
    """  # noqa: E501
    super().__init__(
        numer=value,
        denom=None,
        numer_covariate=covariate,
        denom_covariate=None,
        alternative=alternative,
        confidence_level=confidence_level,
        equal_var=equal_var,
        use_t=use_t,
        alpha=alpha,
        ratio=ratio,
        power=power,
        effect_size=effect_size,
        rel_effect_size=rel_effect_size,
        n_obs=n_obs,
    )
    self.value = value
    self.covariate = covariate

aggr_cols: AggrCols property #

Columns to be aggregated for a metric analysis.

analyze(data, control, treatment, variant=None) #

Analyze a metric in an experiment.

Parameters:

Name Type Description Default
data DataFrame | Table | dict[Any, Aggregates]

Experimental data.

required
control Any

Control variant.

required
treatment Any

Treatment variant.

required
variant str | None

Variant column name.

None

Returns:

Type Description
R

Analysis result.

Source code in src/tea_tasting/metrics/base.py
def analyze(
    self,
    data: pd.DataFrame | ibis.expr.types.Table | dict[
        Any, tea_tasting.aggr.Aggregates],
    control: Any,
    treatment: Any,
    variant: str | None = None,
) -> R:
    """Analyze a metric in an experiment.

    Args:
        data: Experimental data.
        control: Control variant.
        treatment: Treatment variant.
        variant: Variant column name.

    Returns:
        Analysis result.
    """
    aggr = aggregate_by_variants(
        data,
        aggr_cols=self.aggr_cols,
        variant=variant,
    )
    return self.analyze_aggregates(
        control=aggr[control],
        treatment=aggr[treatment],
    )

analyze_aggregates(control, treatment) #

Analyze a metric in an experiment using aggregated statistics.

Parameters:

Name Type Description Default
control Aggregates

Control data.

required
treatment Aggregates

Treatment data.

required

Returns:

Type Description
MeanResult

Analysis result.

Source code in src/tea_tasting/metrics/mean.py
def analyze_aggregates(
    self,
    control: tea_tasting.aggr.Aggregates,
    treatment: tea_tasting.aggr.Aggregates,
) -> MeanResult:
    """Analyze a metric in an experiment using aggregated statistics.

    Args:
        control: Control data.
        treatment: Treatment data.

    Returns:
        Analysis result.
    """
    control = control.with_zero_div()
    treatment = treatment.with_zero_div()
    total = control + treatment
    covariate_coef = self._covariate_coef(total)
    covariate_mean = total.mean(self.numer_covariate) / total.mean(
        self.denom_covariate)
    return self._analyze_stats(
        contr_mean=self._metric_mean(control, covariate_coef, covariate_mean),
        contr_var=self._metric_var(control, covariate_coef),
        contr_count=control.count(),
        treat_mean=self._metric_mean(treatment, covariate_coef, covariate_mean),
        treat_var=self._metric_var(treatment, covariate_coef),
        treat_count=treatment.count(),
    )

solve_power(data, parameter='rel_effect_size') #

Solve for a parameter of the power of a test.

Parameters:

Name Type Description Default
data DataFrame | Table | Aggregates

Sample data.

required
parameter Literal['power', 'effect_size', 'rel_effect_size', 'n_obs']

Parameter name.

'rel_effect_size'

Returns:

Type Description
S

Power analysis result.

Source code in src/tea_tasting/metrics/base.py
def solve_power(
    self,
    data: pd.DataFrame | ibis.expr.types.Table | tea_tasting.aggr.Aggregates,
    parameter: Literal[
        "power", "effect_size", "rel_effect_size", "n_obs"] = "rel_effect_size",
) -> S:
    """Solve for a parameter of the power of a test.

    Args:
        data: Sample data.
        parameter: Parameter name.

    Returns:
        Power analysis result.
    """
    if not isinstance(data, tea_tasting.aggr.Aggregates):
        data = tea_tasting.aggr.read_aggregates(
            data=data,
            group_col=None,
            **self.aggr_cols._asdict(),
        )
    return self.solve_power_from_aggregates(data=data, parameter=parameter)

solve_power_from_aggregates(data, parameter='rel_effect_size') #

Solve for a parameter of the power of a test.

Parameters:

Name Type Description Default
data Aggregates

Sample data.

required
parameter Literal['power', 'effect_size', 'rel_effect_size', 'n_obs']

Parameter name.

'rel_effect_size'

Returns:

Type Description
MeanPowerResults

Power analysis result.

Source code in src/tea_tasting/metrics/mean.py
def solve_power_from_aggregates(
    self,
    data: tea_tasting.aggr.Aggregates,
    parameter: Literal[
        "power", "effect_size", "rel_effect_size", "n_obs"] = "rel_effect_size",
) -> MeanPowerResults:
    """Solve for a parameter of the power of a test.

    Args:
        data: Sample data.
        parameter: Parameter name.

    Returns:
        Power analysis result.
    """
    tea_tasting.utils.check_scalar(
        parameter,
        "parameter",
        in_={"power", "effect_size", "rel_effect_size", "n_obs"},
    )

    data = data.with_zero_div()
    covariate_coef = self._covariate_coef(data)
    covariate_mean = data.mean(self.numer_covariate) / data.mean(
        self.denom_covariate)
    metric_mean = self._metric_mean(data, covariate_coef, covariate_mean)

    power, effect_size, rel_effect_size, n_obs = self._validate_power_parameters(
        metric_mean=metric_mean,
        sample_count=data.count(),
        parameter=parameter,
    )

    result = MeanPowerResults()
    for effect_size_i, rel_effect_size_i in zip(
        effect_size,
        rel_effect_size,
        strict=True,
    ):
        for n_obs_i in n_obs:
            parameter_value = self._solve_power_from_stats(
                sample_var=self._metric_var(data, covariate_coef),
                sample_count=n_obs_i,
                effect_size=effect_size_i,
                power=power,
            )
            result.append(MeanPowerResult(
                power=parameter_value if parameter == "power" else power,  # type: ignore
                effect_size=(
                    parameter_value
                    if parameter in {"effect_size", "rel_effect_size"}
                    else effect_size_i
                ),  # type: ignore
                rel_effect_size=(
                    parameter_value / metric_mean
                    if parameter in {"effect_size", "rel_effect_size"}
                    else rel_effect_size_i
                ),  # type: ignore
                n_obs=(
                    math.ceil(parameter_value)
                    if parameter == "n_obs"
                    else n_obs_i
                ),  # type: ignore
            ))

    return result

MeanPowerResult #

Bases: NamedTuple

Power analysis results.

Attributes:

Name Type Description
power float

Statistical power.

effect_size float

Absolute effect size. Difference between the two means.

rel_effect_size float

Relative effect size. Difference between the two means, divided by the control mean.

n_obs float

Number of observations in the control and in the treatment together.

MeanResult #

Bases: NamedTuple

Result of the analysis of means.

Attributes:

Name Type Description
control float

Control mean.

treatment float

Treatment mean.

effect_size float

Absolute effect size. Difference between the two means.

effect_size_ci_lower float

Lower bound of the absolute effect size confidence interval.

effect_size_ci_upper float

Upper bound of the absolute effect size confidence interval.

rel_effect_size float

Relative effect size. Difference between the two means, divided by the control mean.

rel_effect_size_ci_lower float

Lower bound of the relative effect size confidence interval.

rel_effect_size_ci_upper float

Upper bound of the relative effect size confidence interval.

pvalue float

P-value

statistic float

Statistic (standardized effect size).

RatioOfMeans(numer, denom=None, numer_covariate=None, denom_covariate=None, *, alternative=None, confidence_level=None, equal_var=None, use_t=None, alpha=None, ratio=None, power=None, effect_size=None, rel_effect_size=None, n_obs=None) #

Bases: MetricBaseAggregated[MeanResult], PowerBaseAggregated[MeanPowerResults]

Metric for the analysis of ratios of means.

Parameters:

Name Type Description Default
numer str

Numerator column name.

required
denom str | None

Denominator column name.

None
numer_covariate str | None

Covariate numerator column name.

None
denom_covariate str | None

Covariate denominator column name.

None
alternative Literal['two-sided', 'greater', 'less'] | None

Alternative hypothesis.

None
confidence_level float | None

Confidence level for the confidence interval.

None
equal_var bool | None

Defines whether equal variance is assumed. If True, pooled variance is used for the calculation of the standard error of the difference between two means.

None
use_t bool | None

Defines whether to use the Student's t-distribution (True) or the Normal distribution (False).

None
alpha float | None

Significance level. Only for the analysis of power.

None
ratio float | int | None

Ratio of the number of observations in the treatment relative to the control. Only for the analysis of power.

None
power float | None

Statistical power. Only for the analysis of power.

None
effect_size float | int | Sequence[float | int] | None

Absolute effect size. Difference between the two means. Only for the analysis of power.

None
rel_effect_size float | Sequence[float] | None

Relative effect size. Difference between the two means, divided by the control mean. Only for the analysis of power.

None
n_obs int | Sequence[int] | None

Number of observations in the control and in the treatment together. Only for the analysis of power.

None
Alternative hypothesis options
  • "two-sided": the means are unequal,
  • "greater": the mean in the treatment variant is greater than the mean in the control variant,
  • "less": the mean in the treatment variant is less than the mean in the control variant.
Parameter defaults

Defaults for parameters alpha, alternative, confidence_level, equal_var, n_obs, power, ratio, and use_t can be changed using the config_context and set_context functions. See the Global configuration reference for details.

References

Examples:

import tea_tasting as tt


experiment = tt.Experiment(
    orders_per_session=tt.RatioOfMeans("orders", "sessions"),
)

data = tt.make_users_data(seed=42)
result = experiment.analyze(data)
print(result)
#>             metric control treatment rel_effect_size rel_effect_size_ci pvalue
#> orders_per_session   0.266     0.289            8.8%      [-0.89%, 19%] 0.0762

With CUPED:

experiment = tt.Experiment(
    orders_per_session=tt.RatioOfMeans(
        "orders",
        "sessions",
        "orders_covariate",
        "sessions_covariate",
    ),
)

data = tt.make_users_data(seed=42, covariates=True)
result = experiment.analyze(data)
print(result)
#>             metric control treatment rel_effect_size rel_effect_size_ci  pvalue
#> orders_per_session   0.262     0.293             12%        [4.2%, 21%] 0.00229

Power analysis:

data = tt.make_users_data(
    seed=42,
    sessions_uplift=0,
    orders_uplift=0,
    revenue_uplift=0,
    covariates=True,
)

orders_per_session = tt.RatioOfMeans(
    "orders",
    "sessions",
    "orders_covariate",
    "sessions_covariate",
    n_obs=(10_000, 20_000),
)
print(orders_per_session.solve_power(data))  # Solve for effect size.
#> power effect_size rel_effect_size n_obs
#>   80%      0.0177            6.8% 10000
#>   80%      0.0125            4.8% 20000

orders_per_session = tt.RatioOfMeans(
    "orders",
    "sessions",
    "orders_covariate",
    "sessions_covariate",
    rel_effect_size=0.05,
)
# Solve for the total number of observations.
print(orders_per_session.solve_power(data, "n_obs"))
#> power effect_size rel_effect_size n_obs
#>   80%      0.0130            5.0% 18515

orders_per_session = tt.RatioOfMeans(
    "orders",
    "sessions",
    "orders_covariate",
    "sessions_covariate",
    rel_effect_size=0.1,
)
# Solve for power. Infer number of observations from the sample.
print(orders_per_session.solve_power(data, "power"))
#> power effect_size rel_effect_size n_obs
#>   74%      0.0261             10%  4000
Source code in src/tea_tasting/metrics/mean.py
def __init__(  # noqa: PLR0913
    self,
    numer: str,
    denom: str | None = None,
    numer_covariate: str | None = None,
    denom_covariate: str | None = None,
    *,
    alternative: Literal["two-sided", "greater", "less"] | None = None,
    confidence_level: float | None = None,
    equal_var: bool | None = None,
    use_t: bool | None = None,
    alpha: float | None = None,
    ratio: float | int | None = None,
    power: float | None = None,
    effect_size: float | int | Sequence[float | int] | None = None,
    rel_effect_size: float | Sequence[float] | None = None,
    n_obs: int | Sequence[int] | None = None,
) -> None:
    """Metric for the analysis of ratios of means.

    Args:
        numer: Numerator column name.
        denom: Denominator column name.
        numer_covariate: Covariate numerator column name.
        denom_covariate: Covariate denominator column name.
        alternative: Alternative hypothesis.
        confidence_level: Confidence level for the confidence interval.
        equal_var: Defines whether equal variance is assumed. If `True`,
            pooled variance is used for the calculation of the standard error
            of the difference between two means.
        use_t: Defines whether to use the Student's t-distribution (`True`) or
            the Normal distribution (`False`).
        alpha: Significance level. Only for the analysis of power.
        ratio: Ratio of the number of observations in the treatment
            relative to the control. Only for the analysis of power.
        power: Statistical power. Only for the analysis of power.
        effect_size: Absolute effect size. Difference between the two means.
            Only for the analysis of power.
        rel_effect_size: Relative effect size. Difference between the two means,
            divided by the control mean. Only for the analysis of power.
        n_obs: Number of observations in the control and in the treatment together.
            Only for the analysis of power.

    Alternative hypothesis options:
        - `"two-sided"`: the means are unequal,
        - `"greater"`: the mean in the treatment variant is greater than the mean
            in the control variant,
        - `"less"`: the mean in the treatment variant is less than the mean
            in the control variant.

    Parameter defaults:
        Defaults for parameters `alpha`, `alternative`, `confidence_level`,
        `equal_var`, `n_obs`, `power`, `ratio`, and `use_t` can be changed
        using the `config_context` and `set_context` functions.
        See the [Global configuration](https://tea-tasting.e10v.me/api/config/)
        reference for details.

    References:
        - [Deng, A., Knoblich, U., & Lu, J. (2018). Applying the Delta Method in Metric Analytics: A Practical Guide with Novel Ideas](https://alexdeng.github.io/public/files/kdd2018-dm.pdf).
        - [Deng, A., Xu, Y., Kohavi, R., & Walker, T. (2013). Improving the Sensitivity of Online Controlled Experiments by Utilizing Pre-Experiment Data](https://exp-platform.com/Documents/2013-02-CUPED-ImprovingSensitivityOfControlledExperiments.pdf).

    Examples:
        ```python
        import tea_tasting as tt


        experiment = tt.Experiment(
            orders_per_session=tt.RatioOfMeans("orders", "sessions"),
        )

        data = tt.make_users_data(seed=42)
        result = experiment.analyze(data)
        print(result)
        #>             metric control treatment rel_effect_size rel_effect_size_ci pvalue
        #> orders_per_session   0.266     0.289            8.8%      [-0.89%, 19%] 0.0762
        ```

        With CUPED:

        ```python
        experiment = tt.Experiment(
            orders_per_session=tt.RatioOfMeans(
                "orders",
                "sessions",
                "orders_covariate",
                "sessions_covariate",
            ),
        )

        data = tt.make_users_data(seed=42, covariates=True)
        result = experiment.analyze(data)
        print(result)
        #>             metric control treatment rel_effect_size rel_effect_size_ci  pvalue
        #> orders_per_session   0.262     0.293             12%        [4.2%, 21%] 0.00229
        ```

        Power analysis:

        ```python
        data = tt.make_users_data(
            seed=42,
            sessions_uplift=0,
            orders_uplift=0,
            revenue_uplift=0,
            covariates=True,
        )

        orders_per_session = tt.RatioOfMeans(
            "orders",
            "sessions",
            "orders_covariate",
            "sessions_covariate",
            n_obs=(10_000, 20_000),
        )
        print(orders_per_session.solve_power(data))  # Solve for effect size.
        #> power effect_size rel_effect_size n_obs
        #>   80%      0.0177            6.8% 10000
        #>   80%      0.0125            4.8% 20000

        orders_per_session = tt.RatioOfMeans(
            "orders",
            "sessions",
            "orders_covariate",
            "sessions_covariate",
            rel_effect_size=0.05,
        )
        # Solve for the total number of observations.
        print(orders_per_session.solve_power(data, "n_obs"))
        #> power effect_size rel_effect_size n_obs
        #>   80%      0.0130            5.0% 18515

        orders_per_session = tt.RatioOfMeans(
            "orders",
            "sessions",
            "orders_covariate",
            "sessions_covariate",
            rel_effect_size=0.1,
        )
        # Solve for power. Infer number of observations from the sample.
        print(orders_per_session.solve_power(data, "power"))
        #> power effect_size rel_effect_size n_obs
        #>   74%      0.0261             10%  4000
        ```
    """  # noqa: E501
    self.numer = tea_tasting.utils.check_scalar(numer, "numer", typ=str)
    self.denom = tea_tasting.utils.check_scalar(denom, "denom", typ=str | None)
    self.numer_covariate = tea_tasting.utils.check_scalar(
        numer_covariate, "numer_covariate", typ=str | None)
    self.denom_covariate = tea_tasting.utils.check_scalar(
        denom_covariate, "denom_covariate", typ=str | None)
    self.alternative = (
        tea_tasting.utils.auto_check(alternative, "alternative")
        if alternative is not None
        else tea_tasting.config.get_config("alternative")
    )
    self.confidence_level = (
        tea_tasting.utils.auto_check(confidence_level, "confidence_level")
        if confidence_level is not None
        else tea_tasting.config.get_config("confidence_level")
    )
    self.equal_var = (
        tea_tasting.utils.auto_check(equal_var, "equal_var")
        if equal_var is not None
        else tea_tasting.config.get_config("equal_var")
    )
    self.use_t = (
        tea_tasting.utils.auto_check(use_t, "use_t")
        if use_t is not None
        else tea_tasting.config.get_config("use_t")
    )
    self.alpha = (
        tea_tasting.utils.auto_check(alpha, "alpha")
        if alpha is not None
        else tea_tasting.config.get_config("alpha")
    )
    self.ratio = (
        tea_tasting.utils.auto_check(ratio, "ratio")
        if ratio is not None
        else tea_tasting.config.get_config("ratio")
    )
    self.power = (
        tea_tasting.utils.auto_check(power, "power")
        if power is not None
        else tea_tasting.config.get_config("power")
    )
    if effect_size is not None and rel_effect_size is not None:
        raise ValueError(
            "Both `effect_size` and `rel_effect_size` are not `None`. "
            "Only one of them should be defined.",
        )
    if isinstance(effect_size, Sequence):
        for x in effect_size:
            tea_tasting.utils.check_scalar(
                x, "effect_size", typ=float | int,
                gt=float("-inf"), lt=float("inf"), ne=0,
            )
    elif effect_size is not None:
        tea_tasting.utils.check_scalar(
            effect_size, "effect_size", typ=float | int,
            gt=float("-inf"), lt=float("inf"), ne=0,
        )
    self.effect_size = effect_size
    if isinstance(rel_effect_size, Sequence):
        for x in rel_effect_size:
            tea_tasting.utils.check_scalar(
                x, "rel_effect_size", typ=float | int,
                gt=float("-inf"), lt=float("inf"), ne=0,
            )
    elif rel_effect_size is not None:
        tea_tasting.utils.check_scalar(
            rel_effect_size, "rel_effect_size", typ=float | int,
            gt=float("-inf"), lt=float("inf"), ne=0,
        )
    self.rel_effect_size = rel_effect_size
    self.n_obs = (
        tea_tasting.utils.auto_check(n_obs, "n_obs")
        if n_obs is not None
        else tea_tasting.config.get_config("n_obs")
    )

aggr_cols: AggrCols property #

Columns to be aggregated for a metric analysis.

analyze(data, control, treatment, variant=None) #

Analyze a metric in an experiment.

Parameters:

Name Type Description Default
data DataFrame | Table | dict[Any, Aggregates]

Experimental data.

required
control Any

Control variant.

required
treatment Any

Treatment variant.

required
variant str | None

Variant column name.

None

Returns:

Type Description
R

Analysis result.

Source code in src/tea_tasting/metrics/base.py
def analyze(
    self,
    data: pd.DataFrame | ibis.expr.types.Table | dict[
        Any, tea_tasting.aggr.Aggregates],
    control: Any,
    treatment: Any,
    variant: str | None = None,
) -> R:
    """Analyze a metric in an experiment.

    Args:
        data: Experimental data.
        control: Control variant.
        treatment: Treatment variant.
        variant: Variant column name.

    Returns:
        Analysis result.
    """
    aggr = aggregate_by_variants(
        data,
        aggr_cols=self.aggr_cols,
        variant=variant,
    )
    return self.analyze_aggregates(
        control=aggr[control],
        treatment=aggr[treatment],
    )

analyze_aggregates(control, treatment) #

Analyze a metric in an experiment using aggregated statistics.

Parameters:

Name Type Description Default
control Aggregates

Control data.

required
treatment Aggregates

Treatment data.

required

Returns:

Type Description
MeanResult

Analysis result.

Source code in src/tea_tasting/metrics/mean.py
def analyze_aggregates(
    self,
    control: tea_tasting.aggr.Aggregates,
    treatment: tea_tasting.aggr.Aggregates,
) -> MeanResult:
    """Analyze a metric in an experiment using aggregated statistics.

    Args:
        control: Control data.
        treatment: Treatment data.

    Returns:
        Analysis result.
    """
    control = control.with_zero_div()
    treatment = treatment.with_zero_div()
    total = control + treatment
    covariate_coef = self._covariate_coef(total)
    covariate_mean = total.mean(self.numer_covariate) / total.mean(
        self.denom_covariate)
    return self._analyze_stats(
        contr_mean=self._metric_mean(control, covariate_coef, covariate_mean),
        contr_var=self._metric_var(control, covariate_coef),
        contr_count=control.count(),
        treat_mean=self._metric_mean(treatment, covariate_coef, covariate_mean),
        treat_var=self._metric_var(treatment, covariate_coef),
        treat_count=treatment.count(),
    )

solve_power(data, parameter='rel_effect_size') #

Solve for a parameter of the power of a test.

Parameters:

Name Type Description Default
data DataFrame | Table | Aggregates

Sample data.

required
parameter Literal['power', 'effect_size', 'rel_effect_size', 'n_obs']

Parameter name.

'rel_effect_size'

Returns:

Type Description
S

Power analysis result.

Source code in src/tea_tasting/metrics/base.py
def solve_power(
    self,
    data: pd.DataFrame | ibis.expr.types.Table | tea_tasting.aggr.Aggregates,
    parameter: Literal[
        "power", "effect_size", "rel_effect_size", "n_obs"] = "rel_effect_size",
) -> S:
    """Solve for a parameter of the power of a test.

    Args:
        data: Sample data.
        parameter: Parameter name.

    Returns:
        Power analysis result.
    """
    if not isinstance(data, tea_tasting.aggr.Aggregates):
        data = tea_tasting.aggr.read_aggregates(
            data=data,
            group_col=None,
            **self.aggr_cols._asdict(),
        )
    return self.solve_power_from_aggregates(data=data, parameter=parameter)

solve_power_from_aggregates(data, parameter='rel_effect_size') #

Solve for a parameter of the power of a test.

Parameters:

Name Type Description Default
data Aggregates

Sample data.

required
parameter Literal['power', 'effect_size', 'rel_effect_size', 'n_obs']

Parameter name.

'rel_effect_size'

Returns:

Type Description
MeanPowerResults

Power analysis result.

Source code in src/tea_tasting/metrics/mean.py
def solve_power_from_aggregates(
    self,
    data: tea_tasting.aggr.Aggregates,
    parameter: Literal[
        "power", "effect_size", "rel_effect_size", "n_obs"] = "rel_effect_size",
) -> MeanPowerResults:
    """Solve for a parameter of the power of a test.

    Args:
        data: Sample data.
        parameter: Parameter name.

    Returns:
        Power analysis result.
    """
    tea_tasting.utils.check_scalar(
        parameter,
        "parameter",
        in_={"power", "effect_size", "rel_effect_size", "n_obs"},
    )

    data = data.with_zero_div()
    covariate_coef = self._covariate_coef(data)
    covariate_mean = data.mean(self.numer_covariate) / data.mean(
        self.denom_covariate)
    metric_mean = self._metric_mean(data, covariate_coef, covariate_mean)

    power, effect_size, rel_effect_size, n_obs = self._validate_power_parameters(
        metric_mean=metric_mean,
        sample_count=data.count(),
        parameter=parameter,
    )

    result = MeanPowerResults()
    for effect_size_i, rel_effect_size_i in zip(
        effect_size,
        rel_effect_size,
        strict=True,
    ):
        for n_obs_i in n_obs:
            parameter_value = self._solve_power_from_stats(
                sample_var=self._metric_var(data, covariate_coef),
                sample_count=n_obs_i,
                effect_size=effect_size_i,
                power=power,
            )
            result.append(MeanPowerResult(
                power=parameter_value if parameter == "power" else power,  # type: ignore
                effect_size=(
                    parameter_value
                    if parameter in {"effect_size", "rel_effect_size"}
                    else effect_size_i
                ),  # type: ignore
                rel_effect_size=(
                    parameter_value / metric_mean
                    if parameter in {"effect_size", "rel_effect_size"}
                    else rel_effect_size_i
                ),  # type: ignore
                n_obs=(
                    math.ceil(parameter_value)
                    if parameter == "n_obs"
                    else n_obs_i
                ),  # type: ignore
            ))

    return result