Skip to content

tea_tasting.metrics.mean #

Metrics for the analysis of means.

Mean(value, covariate=None, *, alternative=None, confidence_level=None, equal_var=None, use_t=None, alpha=None, ratio=None, power=None, effect_size=None, rel_effect_size=None, n_obs=None) #

Bases: RatioOfMeans

Metric for the analysis of means.

Parameters:

Name Type Description Default
value str

Metric value column name.

required
covariate str | None

Metric covariate column name.

None
alternative Literal['two-sided', 'greater', 'less'] | None

Alternative hypothesis:

  • "two-sided": the means are unequal,
  • "greater": the mean in the treatment variant is greater than the mean in the control variant,
  • "less": the mean in the treatment variant is less than the mean in the control variant.
None
confidence_level float | None

Confidence level for the confidence interval.

None
equal_var bool | None

Defines whether equal variance is assumed. If True, pooled variance is used for the calculation of the standard error of the difference between two means.

None
use_t bool | None

Defines whether to use the Student's t-distribution (True) or the Normal distribution (False).

None
alpha float | None

Significance level. Only for the analysis of power.

None
ratio float | int | None

Ratio of the number of observations in the treatment relative to the control. Only for the analysis of power.

None
power float | None

Statistical power. Only for the analysis of power.

None
effect_size float | int | Sequence[float | int] | None

Absolute effect size. Difference between the two means. Only for the analysis of power.

None
rel_effect_size float | Sequence[float] | None

Relative effect size. Difference between the two means, divided by the control mean. Only for the analysis of power.

None
n_obs int | Sequence[int] | None

Number of observations in the control and in the treatment together. Only for the analysis of power.

None
Parameter defaults

Defaults for parameters alpha, alternative, confidence_level, equal_var, n_obs, power, ratio, and use_t can be changed using the config_context and set_context functions. See the Global configuration reference for details.

References

Examples:

>>> import tea_tasting as tt

>>> experiment = tt.Experiment(
...     orders_per_user=tt.Mean("orders"),
...     revenue_per_user=tt.Mean("revenue"),
... )
>>> data = tt.make_users_data(seed=42)
>>> result = experiment.analyze(data)
>>> print(result)
          metric control treatment rel_effect_size rel_effect_size_ci pvalue
 orders_per_user   0.530     0.573            8.0%       [-2.0%, 19%]  0.118
revenue_per_user    5.24      5.73            9.3%       [-2.4%, 22%]  0.123

With CUPED:

>>> experiment = tt.Experiment(
...     orders_per_user=tt.Mean("orders", "orders_covariate"),
...     revenue_per_user=tt.Mean("revenue", "revenue_covariate"),
... )
>>> data = tt.make_users_data(seed=42, covariates=True)
>>> result = experiment.analyze(data)
>>> print(result)
          metric control treatment rel_effect_size rel_effect_size_ci  pvalue
 orders_per_user   0.523     0.581             11%        [2.9%, 20%] 0.00733
revenue_per_user    5.12      5.85             14%        [3.8%, 26%] 0.00674

Power analysis:

>>> data = tt.make_users_data(
...     seed=42,
...     sessions_uplift=0,
...     orders_uplift=0,
...     revenue_uplift=0,
...     covariates=True,
... )
>>> orders_per_user = tt.Mean(
...     "orders",
...     "orders_covariate",
...     n_obs=(10_000, 20_000),
... )
>>> # Solve for effect size.
>>> print(orders_per_user.solve_power(data))
power effect_size rel_effect_size n_obs
  80%      0.0374            7.2% 10000
  80%      0.0264            5.1% 20000

>>> orders_per_user = tt.Mean(
...     "orders",
...     "orders_covariate",
...     rel_effect_size=0.05,
... )
>>> # Solve for the total number of observations.
>>> print(orders_per_user.solve_power(data, "n_obs"))
power effect_size rel_effect_size n_obs
  80%      0.0260            5.0% 20733

>>> orders_per_user = tt.Mean(
...     "orders",
...     "orders_covariate",
...     rel_effect_size=0.1,
... )
>>> # Solve for power. Infer number of observations from the sample.
>>> print(orders_per_user.solve_power(data, "power"))
power effect_size rel_effect_size n_obs
  69%      0.0519             10%  4000
Source code in src/tea_tasting/metrics/mean.py
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
def __init__(  # noqa: PLR0913
    self,
    value: str,
    covariate: str | None = None,
    *,
    alternative: Literal["two-sided", "greater", "less"] | None = None,
    confidence_level: float | None = None,
    equal_var: bool | None = None,
    use_t: bool | None = None,
    alpha: float | None = None,
    ratio: float | int | None = None,
    power: float | None = None,
    effect_size: float | int | Sequence[float | int] | None = None,
    rel_effect_size: float | Sequence[float] | None = None,
    n_obs: int | Sequence[int] | None = None,
) -> None:
    """Metric for the analysis of means.

    Args:
        value: Metric value column name.
        covariate: Metric covariate column name.
        alternative: Alternative hypothesis:

            - `"two-sided"`: the means are unequal,
            - `"greater"`: the mean in the treatment variant is greater than the mean
                in the control variant,
            - `"less"`: the mean in the treatment variant is less than the mean
                in the control variant.

        confidence_level: Confidence level for the confidence interval.
        equal_var: Defines whether equal variance is assumed. If `True`,
            pooled variance is used for the calculation of the standard error
            of the difference between two means.
        use_t: Defines whether to use the Student's t-distribution (`True`) or
            the Normal distribution (`False`).
        alpha: Significance level. Only for the analysis of power.
        ratio: Ratio of the number of observations in the treatment
            relative to the control. Only for the analysis of power.
        power: Statistical power. Only for the analysis of power.
        effect_size: Absolute effect size. Difference between the two means.
            Only for the analysis of power.
        rel_effect_size: Relative effect size. Difference between the two means,
            divided by the control mean. Only for the analysis of power.
        n_obs: Number of observations in the control and in the treatment together.
            Only for the analysis of power.

    Parameter defaults:
        Defaults for parameters `alpha`, `alternative`, `confidence_level`,
        `equal_var`, `n_obs`, `power`, `ratio`, and `use_t` can be changed
        using the `config_context` and `set_context` functions.
        See the [Global configuration](https://tea-tasting.e10v.me/api/config/)
        reference for details.

    References:
        - [Deng, A., Knoblich, U., & Lu, J. (2018). Applying the Delta Method in Metric Analytics: A Practical Guide with Novel Ideas](https://alexdeng.github.io/public/files/kdd2018-dm.pdf).
        - [Deng, A., Xu, Y., Kohavi, R., & Walker, T. (2013). Improving the Sensitivity of Online Controlled Experiments by Utilizing Pre-Experiment Data](https://exp-platform.com/Documents/2013-02-CUPED-ImprovingSensitivityOfControlledExperiments.pdf).

    Examples:
        ```pycon
        >>> import tea_tasting as tt

        >>> experiment = tt.Experiment(
        ...     orders_per_user=tt.Mean("orders"),
        ...     revenue_per_user=tt.Mean("revenue"),
        ... )
        >>> data = tt.make_users_data(seed=42)
        >>> result = experiment.analyze(data)
        >>> print(result)
                  metric control treatment rel_effect_size rel_effect_size_ci pvalue
         orders_per_user   0.530     0.573            8.0%       [-2.0%, 19%]  0.118
        revenue_per_user    5.24      5.73            9.3%       [-2.4%, 22%]  0.123

        ```

        With CUPED:

        ```pycon
        >>> experiment = tt.Experiment(
        ...     orders_per_user=tt.Mean("orders", "orders_covariate"),
        ...     revenue_per_user=tt.Mean("revenue", "revenue_covariate"),
        ... )
        >>> data = tt.make_users_data(seed=42, covariates=True)
        >>> result = experiment.analyze(data)
        >>> print(result)
                  metric control treatment rel_effect_size rel_effect_size_ci  pvalue
         orders_per_user   0.523     0.581             11%        [2.9%, 20%] 0.00733
        revenue_per_user    5.12      5.85             14%        [3.8%, 26%] 0.00674

        ```

        Power analysis:

        ```pycon
        >>> data = tt.make_users_data(
        ...     seed=42,
        ...     sessions_uplift=0,
        ...     orders_uplift=0,
        ...     revenue_uplift=0,
        ...     covariates=True,
        ... )
        >>> orders_per_user = tt.Mean(
        ...     "orders",
        ...     "orders_covariate",
        ...     n_obs=(10_000, 20_000),
        ... )
        >>> # Solve for effect size.
        >>> print(orders_per_user.solve_power(data))
        power effect_size rel_effect_size n_obs
          80%      0.0374            7.2% 10000
          80%      0.0264            5.1% 20000

        >>> orders_per_user = tt.Mean(
        ...     "orders",
        ...     "orders_covariate",
        ...     rel_effect_size=0.05,
        ... )
        >>> # Solve for the total number of observations.
        >>> print(orders_per_user.solve_power(data, "n_obs"))
        power effect_size rel_effect_size n_obs
          80%      0.0260            5.0% 20733

        >>> orders_per_user = tt.Mean(
        ...     "orders",
        ...     "orders_covariate",
        ...     rel_effect_size=0.1,
        ... )
        >>> # Solve for power. Infer number of observations from the sample.
        >>> print(orders_per_user.solve_power(data, "power"))
        power effect_size rel_effect_size n_obs
          69%      0.0519             10%  4000

        ```
    """  # noqa: E501
    super().__init__(
        numer=value,
        denom=None,
        numer_covariate=covariate,
        denom_covariate=None,
        alternative=alternative,
        confidence_level=confidence_level,
        equal_var=equal_var,
        use_t=use_t,
        alpha=alpha,
        ratio=ratio,
        power=power,
        effect_size=effect_size,
        rel_effect_size=rel_effect_size,
        n_obs=n_obs,
    )
    self.value = value
    self.covariate = covariate

aggr_cols: AggrCols property #

Columns to be aggregated for a metric analysis.

analyze(data, control, treatment, variant=None) #

Analyze a metric in an experiment.

Parameters:

Name Type Description Default
data IntoFrame | Table | dict[Any, Aggregates]

Experimental data.

required
control Any

Control variant.

required
treatment Any

Treatment variant.

required
variant str | None

Variant column name.

None

Returns:

Type Description
R

Analysis result.

Source code in src/tea_tasting/metrics/base.py
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
def analyze(
    self,
    data: narwhals.typing.IntoFrame | ibis.expr.types.Table | dict[
        Any, tea_tasting.aggr.Aggregates],
    control: Any,
    treatment: Any,
    variant: str | None = None,
) -> R:
    """Analyze a metric in an experiment.

    Args:
        data: Experimental data.
        control: Control variant.
        treatment: Treatment variant.
        variant: Variant column name.

    Returns:
        Analysis result.
    """
    aggr = aggregate_by_variants(
        data,
        aggr_cols=self.aggr_cols,
        variant=variant,
    )
    return self.analyze_aggregates(
        control=aggr[control],
        treatment=aggr[treatment],
    )

analyze_aggregates(control, treatment) #

Analyze a metric in an experiment using aggregated statistics.

Parameters:

Name Type Description Default
control Aggregates

Control data.

required
treatment Aggregates

Treatment data.

required

Returns:

Type Description
MeanResult

Analysis result.

Source code in src/tea_tasting/metrics/mean.py
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
def analyze_aggregates(
    self,
    control: tea_tasting.aggr.Aggregates,
    treatment: tea_tasting.aggr.Aggregates,
) -> MeanResult:
    """Analyze a metric in an experiment using aggregated statistics.

    Args:
        control: Control data.
        treatment: Treatment data.

    Returns:
        Analysis result.
    """
    control = control.with_zero_div()
    treatment = treatment.with_zero_div()
    total = control + treatment
    covariate_coef = self._covariate_coef(total)
    covariate_mean = total.mean(self.numer_covariate) / total.mean(
        self.denom_covariate)
    return self._analyze_stats(
        contr_mean=self._metric_mean(control, covariate_coef, covariate_mean),
        contr_var=self._metric_var(control, covariate_coef),
        contr_count=control.count(),
        treat_mean=self._metric_mean(treatment, covariate_coef, covariate_mean),
        treat_var=self._metric_var(treatment, covariate_coef),
        treat_count=treatment.count(),
    )

solve_power(data, parameter='rel_effect_size') #

Solve for a parameter of the power of a test.

Parameters:

Name Type Description Default
data IntoFrame | Table | Aggregates

Sample data.

required
parameter Literal['power', 'effect_size', 'rel_effect_size', 'n_obs']

Parameter name.

'rel_effect_size'

Returns:

Type Description
S

Power analysis result.

Source code in src/tea_tasting/metrics/base.py
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
def solve_power(
    self,
    data: (
        narwhals.typing.IntoFrame |
        ibis.expr.types.Table |
        tea_tasting.aggr.Aggregates
    ),
    parameter: Literal[
        "power", "effect_size", "rel_effect_size", "n_obs"] = "rel_effect_size",
) -> S:
    """Solve for a parameter of the power of a test.

    Args:
        data: Sample data.
        parameter: Parameter name.

    Returns:
        Power analysis result.
    """
    if not isinstance(data, tea_tasting.aggr.Aggregates):
        data = tea_tasting.aggr.read_aggregates(
            data=data,
            group_col=None,
            **self.aggr_cols._asdict(),
        )
    return self.solve_power_from_aggregates(data=data, parameter=parameter)

solve_power_from_aggregates(data, parameter='rel_effect_size') #

Solve for a parameter of the power of a test.

Parameters:

Name Type Description Default
data Aggregates

Sample data.

required
parameter Literal['power', 'effect_size', 'rel_effect_size', 'n_obs']

Parameter name.

'rel_effect_size'

Returns:

Type Description
MeanPowerResults

Power analysis result.

Source code in src/tea_tasting/metrics/mean.py
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
def solve_power_from_aggregates(
    self,
    data: tea_tasting.aggr.Aggregates,
    parameter: Literal[
        "power", "effect_size", "rel_effect_size", "n_obs"] = "rel_effect_size",
) -> MeanPowerResults:
    """Solve for a parameter of the power of a test.

    Args:
        data: Sample data.
        parameter: Parameter name.

    Returns:
        Power analysis result.
    """
    tea_tasting.utils.check_scalar(
        parameter,
        "parameter",
        in_={"power", "effect_size", "rel_effect_size", "n_obs"},
    )

    data = data.with_zero_div()
    covariate_coef = self._covariate_coef(data)
    covariate_mean = data.mean(self.numer_covariate) / data.mean(
        self.denom_covariate)
    metric_mean = self._metric_mean(data, covariate_coef, covariate_mean)

    power, effect_size, rel_effect_size, n_obs = self._validate_power_parameters(
        metric_mean=metric_mean,
        sample_count=data.count(),
        parameter=parameter,
    )

    result = MeanPowerResults()
    for effect_size_i, rel_effect_size_i in zip(
        effect_size,
        rel_effect_size,
        strict=True,
    ):
        for n_obs_i in n_obs:
            parameter_value = self._solve_power_from_stats(
                sample_var=self._metric_var(data, covariate_coef),
                sample_count=n_obs_i,
                effect_size=effect_size_i,
                power=power,
            )
            result.append(MeanPowerResult(
                power=parameter_value if parameter == "power" else power,  # type: ignore
                effect_size=(
                    parameter_value
                    if parameter in {"effect_size", "rel_effect_size"}
                    else effect_size_i
                ),  # type: ignore
                rel_effect_size=(
                    parameter_value / metric_mean
                    if parameter in {"effect_size", "rel_effect_size"}
                    else rel_effect_size_i
                ),  # type: ignore
                n_obs=(
                    math.ceil(parameter_value)
                    if parameter == "n_obs"
                    else n_obs_i
                ),  # type: ignore
            ))

    return result

MeanPowerResult #

Bases: NamedTuple

Power analysis results.

Attributes:

Name Type Description
power float

Statistical power.

effect_size float

Absolute effect size. Difference between the two means.

rel_effect_size float

Relative effect size. Difference between the two means, divided by the control mean.

n_obs float

Number of observations in the control and in the treatment together.

MeanResult #

Bases: NamedTuple

Result of the analysis of means.

Attributes:

Name Type Description
control float

Control mean.

treatment float

Treatment mean.

effect_size float

Absolute effect size. Difference between the two means.

effect_size_ci_lower float

Lower bound of the absolute effect size confidence interval.

effect_size_ci_upper float

Upper bound of the absolute effect size confidence interval.

rel_effect_size float

Relative effect size. Difference between the two means, divided by the control mean.

rel_effect_size_ci_lower float

Lower bound of the relative effect size confidence interval.

rel_effect_size_ci_upper float

Upper bound of the relative effect size confidence interval.

pvalue float

P-value

statistic float

Statistic (standardized effect size).

RatioOfMeans(numer, denom=None, numer_covariate=None, denom_covariate=None, *, alternative=None, confidence_level=None, equal_var=None, use_t=None, alpha=None, ratio=None, power=None, effect_size=None, rel_effect_size=None, n_obs=None) #

Bases: MetricBaseAggregated[MeanResult], PowerBaseAggregated[MeanPowerResults]

Metric for the analysis of ratios of means.

Parameters:

Name Type Description Default
numer str

Numerator column name.

required
denom str | None

Denominator column name.

None
numer_covariate str | None

Covariate numerator column name.

None
denom_covariate str | None

Covariate denominator column name.

None
alternative Literal['two-sided', 'greater', 'less'] | None

Alternative hypothesis:

  • "two-sided": the means are unequal,
  • "greater": the mean in the treatment variant is greater than the mean in the control variant,
  • "less": the mean in the treatment variant is less than the mean in the control variant.
None
confidence_level float | None

Confidence level for the confidence interval.

None
equal_var bool | None

Defines whether equal variance is assumed. If True, pooled variance is used for the calculation of the standard error of the difference between two means.

None
use_t bool | None

Defines whether to use the Student's t-distribution (True) or the Normal distribution (False).

None
alpha float | None

Significance level. Only for the analysis of power.

None
ratio float | int | None

Ratio of the number of observations in the treatment relative to the control. Only for the analysis of power.

None
power float | None

Statistical power. Only for the analysis of power.

None
effect_size float | int | Sequence[float | int] | None

Absolute effect size. Difference between the two means. Only for the analysis of power.

None
rel_effect_size float | Sequence[float] | None

Relative effect size. Difference between the two means, divided by the control mean. Only for the analysis of power.

None
n_obs int | Sequence[int] | None

Number of observations in the control and in the treatment together. Only for the analysis of power.

None
Parameter defaults

Defaults for parameters alpha, alternative, confidence_level, equal_var, n_obs, power, ratio, and use_t can be changed using the config_context and set_context functions. See the Global configuration reference for details.

References

Examples:

>>> import tea_tasting as tt

>>> experiment = tt.Experiment(
...     orders_per_session=tt.RatioOfMeans("orders", "sessions"),
... )
>>> data = tt.make_users_data(seed=42)
>>> result = experiment.analyze(data)
>>> print(result)
            metric control treatment rel_effect_size rel_effect_size_ci pvalue
orders_per_session   0.266     0.289            8.8%      [-0.89%, 19%] 0.0762

With CUPED:

>>> experiment = tt.Experiment(
...     orders_per_session=tt.RatioOfMeans(
...         "orders",
...         "sessions",
...         "orders_covariate",
...         "sessions_covariate",
...     ),
... )
>>> data = tt.make_users_data(seed=42, covariates=True)
>>> result = experiment.analyze(data)
>>> print(result)
            metric control treatment rel_effect_size rel_effect_size_ci  pvalue
orders_per_session   0.262     0.293             12%        [4.2%, 21%] 0.00229

Power analysis:

>>> data = tt.make_users_data(
...     seed=42,
...     sessions_uplift=0,
...     orders_uplift=0,
...     revenue_uplift=0,
...     covariates=True,
... )
>>> orders_per_session = tt.RatioOfMeans(
...     "orders",
...     "sessions",
...     "orders_covariate",
...     "sessions_covariate",
...     n_obs=(10_000, 20_000),
... )
>>> # Solve for effect size.
>>> print(orders_per_session.solve_power(data))
power effect_size rel_effect_size n_obs
  80%      0.0177            6.8% 10000
  80%      0.0125            4.8% 20000

>>> orders_per_session = tt.RatioOfMeans(
...     "orders",
...     "sessions",
...     "orders_covariate",
...     "sessions_covariate",
...     rel_effect_size=0.05,
... )
>>> # Solve for the total number of observations.
>>> print(orders_per_session.solve_power(data, "n_obs"))
power effect_size rel_effect_size n_obs
  80%      0.0130            5.0% 18515

>>> orders_per_session = tt.RatioOfMeans(
...     "orders",
...     "sessions",
...     "orders_covariate",
...     "sessions_covariate",
...     rel_effect_size=0.1,
... )
>>> # Solve for power. Infer number of observations from the sample.
>>> print(orders_per_session.solve_power(data, "power"))
power effect_size rel_effect_size n_obs
  74%      0.0261             10%  4000
Source code in src/tea_tasting/metrics/mean.py
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
def __init__(  # noqa: PLR0913
    self,
    numer: str,
    denom: str | None = None,
    numer_covariate: str | None = None,
    denom_covariate: str | None = None,
    *,
    alternative: Literal["two-sided", "greater", "less"] | None = None,
    confidence_level: float | None = None,
    equal_var: bool | None = None,
    use_t: bool | None = None,
    alpha: float | None = None,
    ratio: float | int | None = None,
    power: float | None = None,
    effect_size: float | int | Sequence[float | int] | None = None,
    rel_effect_size: float | Sequence[float] | None = None,
    n_obs: int | Sequence[int] | None = None,
) -> None:
    """Metric for the analysis of ratios of means.

    Args:
        numer: Numerator column name.
        denom: Denominator column name.
        numer_covariate: Covariate numerator column name.
        denom_covariate: Covariate denominator column name.
        alternative: Alternative hypothesis:

            - `"two-sided"`: the means are unequal,
            - `"greater"`: the mean in the treatment variant is greater than the mean
                in the control variant,
            - `"less"`: the mean in the treatment variant is less than the mean
                in the control variant.

        confidence_level: Confidence level for the confidence interval.
        equal_var: Defines whether equal variance is assumed. If `True`,
            pooled variance is used for the calculation of the standard error
            of the difference between two means.
        use_t: Defines whether to use the Student's t-distribution (`True`) or
            the Normal distribution (`False`).
        alpha: Significance level. Only for the analysis of power.
        ratio: Ratio of the number of observations in the treatment
            relative to the control. Only for the analysis of power.
        power: Statistical power. Only for the analysis of power.
        effect_size: Absolute effect size. Difference between the two means.
            Only for the analysis of power.
        rel_effect_size: Relative effect size. Difference between the two means,
            divided by the control mean. Only for the analysis of power.
        n_obs: Number of observations in the control and in the treatment together.
            Only for the analysis of power.

    Parameter defaults:
        Defaults for parameters `alpha`, `alternative`, `confidence_level`,
        `equal_var`, `n_obs`, `power`, `ratio`, and `use_t` can be changed
        using the `config_context` and `set_context` functions.
        See the [Global configuration](https://tea-tasting.e10v.me/api/config/)
        reference for details.

    References:
        - [Deng, A., Knoblich, U., & Lu, J. (2018). Applying the Delta Method in Metric Analytics: A Practical Guide with Novel Ideas](https://alexdeng.github.io/public/files/kdd2018-dm.pdf).
        - [Deng, A., Xu, Y., Kohavi, R., & Walker, T. (2013). Improving the Sensitivity of Online Controlled Experiments by Utilizing Pre-Experiment Data](https://exp-platform.com/Documents/2013-02-CUPED-ImprovingSensitivityOfControlledExperiments.pdf).

    Examples:
        ```pycon
        >>> import tea_tasting as tt

        >>> experiment = tt.Experiment(
        ...     orders_per_session=tt.RatioOfMeans("orders", "sessions"),
        ... )
        >>> data = tt.make_users_data(seed=42)
        >>> result = experiment.analyze(data)
        >>> print(result)
                    metric control treatment rel_effect_size rel_effect_size_ci pvalue
        orders_per_session   0.266     0.289            8.8%      [-0.89%, 19%] 0.0762

        ```

        With CUPED:

        ```pycon
        >>> experiment = tt.Experiment(
        ...     orders_per_session=tt.RatioOfMeans(
        ...         "orders",
        ...         "sessions",
        ...         "orders_covariate",
        ...         "sessions_covariate",
        ...     ),
        ... )
        >>> data = tt.make_users_data(seed=42, covariates=True)
        >>> result = experiment.analyze(data)
        >>> print(result)
                    metric control treatment rel_effect_size rel_effect_size_ci  pvalue
        orders_per_session   0.262     0.293             12%        [4.2%, 21%] 0.00229

        ```

        Power analysis:

        ```pycon
        >>> data = tt.make_users_data(
        ...     seed=42,
        ...     sessions_uplift=0,
        ...     orders_uplift=0,
        ...     revenue_uplift=0,
        ...     covariates=True,
        ... )
        >>> orders_per_session = tt.RatioOfMeans(
        ...     "orders",
        ...     "sessions",
        ...     "orders_covariate",
        ...     "sessions_covariate",
        ...     n_obs=(10_000, 20_000),
        ... )
        >>> # Solve for effect size.
        >>> print(orders_per_session.solve_power(data))
        power effect_size rel_effect_size n_obs
          80%      0.0177            6.8% 10000
          80%      0.0125            4.8% 20000

        >>> orders_per_session = tt.RatioOfMeans(
        ...     "orders",
        ...     "sessions",
        ...     "orders_covariate",
        ...     "sessions_covariate",
        ...     rel_effect_size=0.05,
        ... )
        >>> # Solve for the total number of observations.
        >>> print(orders_per_session.solve_power(data, "n_obs"))
        power effect_size rel_effect_size n_obs
          80%      0.0130            5.0% 18515

        >>> orders_per_session = tt.RatioOfMeans(
        ...     "orders",
        ...     "sessions",
        ...     "orders_covariate",
        ...     "sessions_covariate",
        ...     rel_effect_size=0.1,
        ... )
        >>> # Solve for power. Infer number of observations from the sample.
        >>> print(orders_per_session.solve_power(data, "power"))
        power effect_size rel_effect_size n_obs
          74%      0.0261             10%  4000

        ```
    """  # noqa: E501
    self.numer = tea_tasting.utils.check_scalar(numer, "numer", typ=str)
    self.denom = tea_tasting.utils.check_scalar(denom, "denom", typ=str | None)
    self.numer_covariate = tea_tasting.utils.check_scalar(
        numer_covariate, "numer_covariate", typ=str | None)
    self.denom_covariate = tea_tasting.utils.check_scalar(
        denom_covariate, "denom_covariate", typ=str | None)
    self.alternative = (
        tea_tasting.utils.auto_check(alternative, "alternative")
        if alternative is not None
        else tea_tasting.config.get_config("alternative")
    )
    self.confidence_level = (
        tea_tasting.utils.auto_check(confidence_level, "confidence_level")
        if confidence_level is not None
        else tea_tasting.config.get_config("confidence_level")
    )
    self.equal_var = (
        tea_tasting.utils.auto_check(equal_var, "equal_var")
        if equal_var is not None
        else tea_tasting.config.get_config("equal_var")
    )
    self.use_t = (
        tea_tasting.utils.auto_check(use_t, "use_t")
        if use_t is not None
        else tea_tasting.config.get_config("use_t")
    )
    self.alpha = (
        tea_tasting.utils.auto_check(alpha, "alpha")
        if alpha is not None
        else tea_tasting.config.get_config("alpha")
    )
    self.ratio = (
        tea_tasting.utils.auto_check(ratio, "ratio")
        if ratio is not None
        else tea_tasting.config.get_config("ratio")
    )
    self.power = (
        tea_tasting.utils.auto_check(power, "power")
        if power is not None
        else tea_tasting.config.get_config("power")
    )
    if effect_size is not None and rel_effect_size is not None:
        raise ValueError(
            "Both `effect_size` and `rel_effect_size` are not `None`. "
            "Only one of them should be defined.",
        )
    if isinstance(effect_size, Sequence):
        for x in effect_size:
            tea_tasting.utils.check_scalar(
                x, "effect_size", typ=float | int,
                gt=float("-inf"), lt=float("inf"), ne=0,
            )
    elif effect_size is not None:
        tea_tasting.utils.check_scalar(
            effect_size, "effect_size", typ=float | int,
            gt=float("-inf"), lt=float("inf"), ne=0,
        )
    self.effect_size = effect_size
    if isinstance(rel_effect_size, Sequence):
        for x in rel_effect_size:
            tea_tasting.utils.check_scalar(
                x, "rel_effect_size", typ=float | int,
                gt=float("-inf"), lt=float("inf"), ne=0,
            )
    elif rel_effect_size is not None:
        tea_tasting.utils.check_scalar(
            rel_effect_size, "rel_effect_size", typ=float | int,
            gt=float("-inf"), lt=float("inf"), ne=0,
        )
    self.rel_effect_size = rel_effect_size
    self.n_obs = (
        tea_tasting.utils.auto_check(n_obs, "n_obs")
        if n_obs is not None
        else tea_tasting.config.get_config("n_obs")
    )

aggr_cols: AggrCols property #

Columns to be aggregated for a metric analysis.

analyze(data, control, treatment, variant=None) #

Analyze a metric in an experiment.

Parameters:

Name Type Description Default
data IntoFrame | Table | dict[Any, Aggregates]

Experimental data.

required
control Any

Control variant.

required
treatment Any

Treatment variant.

required
variant str | None

Variant column name.

None

Returns:

Type Description
R

Analysis result.

Source code in src/tea_tasting/metrics/base.py
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
def analyze(
    self,
    data: narwhals.typing.IntoFrame | ibis.expr.types.Table | dict[
        Any, tea_tasting.aggr.Aggregates],
    control: Any,
    treatment: Any,
    variant: str | None = None,
) -> R:
    """Analyze a metric in an experiment.

    Args:
        data: Experimental data.
        control: Control variant.
        treatment: Treatment variant.
        variant: Variant column name.

    Returns:
        Analysis result.
    """
    aggr = aggregate_by_variants(
        data,
        aggr_cols=self.aggr_cols,
        variant=variant,
    )
    return self.analyze_aggregates(
        control=aggr[control],
        treatment=aggr[treatment],
    )

analyze_aggregates(control, treatment) #

Analyze a metric in an experiment using aggregated statistics.

Parameters:

Name Type Description Default
control Aggregates

Control data.

required
treatment Aggregates

Treatment data.

required

Returns:

Type Description
MeanResult

Analysis result.

Source code in src/tea_tasting/metrics/mean.py
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
def analyze_aggregates(
    self,
    control: tea_tasting.aggr.Aggregates,
    treatment: tea_tasting.aggr.Aggregates,
) -> MeanResult:
    """Analyze a metric in an experiment using aggregated statistics.

    Args:
        control: Control data.
        treatment: Treatment data.

    Returns:
        Analysis result.
    """
    control = control.with_zero_div()
    treatment = treatment.with_zero_div()
    total = control + treatment
    covariate_coef = self._covariate_coef(total)
    covariate_mean = total.mean(self.numer_covariate) / total.mean(
        self.denom_covariate)
    return self._analyze_stats(
        contr_mean=self._metric_mean(control, covariate_coef, covariate_mean),
        contr_var=self._metric_var(control, covariate_coef),
        contr_count=control.count(),
        treat_mean=self._metric_mean(treatment, covariate_coef, covariate_mean),
        treat_var=self._metric_var(treatment, covariate_coef),
        treat_count=treatment.count(),
    )

solve_power(data, parameter='rel_effect_size') #

Solve for a parameter of the power of a test.

Parameters:

Name Type Description Default
data IntoFrame | Table | Aggregates

Sample data.

required
parameter Literal['power', 'effect_size', 'rel_effect_size', 'n_obs']

Parameter name.

'rel_effect_size'

Returns:

Type Description
S

Power analysis result.

Source code in src/tea_tasting/metrics/base.py
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
def solve_power(
    self,
    data: (
        narwhals.typing.IntoFrame |
        ibis.expr.types.Table |
        tea_tasting.aggr.Aggregates
    ),
    parameter: Literal[
        "power", "effect_size", "rel_effect_size", "n_obs"] = "rel_effect_size",
) -> S:
    """Solve for a parameter of the power of a test.

    Args:
        data: Sample data.
        parameter: Parameter name.

    Returns:
        Power analysis result.
    """
    if not isinstance(data, tea_tasting.aggr.Aggregates):
        data = tea_tasting.aggr.read_aggregates(
            data=data,
            group_col=None,
            **self.aggr_cols._asdict(),
        )
    return self.solve_power_from_aggregates(data=data, parameter=parameter)

solve_power_from_aggregates(data, parameter='rel_effect_size') #

Solve for a parameter of the power of a test.

Parameters:

Name Type Description Default
data Aggregates

Sample data.

required
parameter Literal['power', 'effect_size', 'rel_effect_size', 'n_obs']

Parameter name.

'rel_effect_size'

Returns:

Type Description
MeanPowerResults

Power analysis result.

Source code in src/tea_tasting/metrics/mean.py
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
def solve_power_from_aggregates(
    self,
    data: tea_tasting.aggr.Aggregates,
    parameter: Literal[
        "power", "effect_size", "rel_effect_size", "n_obs"] = "rel_effect_size",
) -> MeanPowerResults:
    """Solve for a parameter of the power of a test.

    Args:
        data: Sample data.
        parameter: Parameter name.

    Returns:
        Power analysis result.
    """
    tea_tasting.utils.check_scalar(
        parameter,
        "parameter",
        in_={"power", "effect_size", "rel_effect_size", "n_obs"},
    )

    data = data.with_zero_div()
    covariate_coef = self._covariate_coef(data)
    covariate_mean = data.mean(self.numer_covariate) / data.mean(
        self.denom_covariate)
    metric_mean = self._metric_mean(data, covariate_coef, covariate_mean)

    power, effect_size, rel_effect_size, n_obs = self._validate_power_parameters(
        metric_mean=metric_mean,
        sample_count=data.count(),
        parameter=parameter,
    )

    result = MeanPowerResults()
    for effect_size_i, rel_effect_size_i in zip(
        effect_size,
        rel_effect_size,
        strict=True,
    ):
        for n_obs_i in n_obs:
            parameter_value = self._solve_power_from_stats(
                sample_var=self._metric_var(data, covariate_coef),
                sample_count=n_obs_i,
                effect_size=effect_size_i,
                power=power,
            )
            result.append(MeanPowerResult(
                power=parameter_value if parameter == "power" else power,  # type: ignore
                effect_size=(
                    parameter_value
                    if parameter in {"effect_size", "rel_effect_size"}
                    else effect_size_i
                ),  # type: ignore
                rel_effect_size=(
                    parameter_value / metric_mean
                    if parameter in {"effect_size", "rel_effect_size"}
                    else rel_effect_size_i
                ),  # type: ignore
                n_obs=(
                    math.ceil(parameter_value)
                    if parameter == "n_obs"
                    else n_obs_i
                ),  # type: ignore
            ))

    return result