Skip to content

tea_tasting.experiment #

Experiment and experiment result.

Experiment(metrics=None, variant='variant', **kw_metrics) #

Bases: ReprMixin

Experiment definition: metrics and variant column.

Parameters:

Name Type Description Default
metrics dict[str, MetricBase[Any]] | None

Dictionary of metrics with metric names as keys.

None
variant str

Variant column name.

'variant'
kw_metrics MetricBase[Any]

Metrics with metric names as parameter names.

{}

Examples:

>>> import tea_tasting as tt

>>> experiment = tt.Experiment(
...     sessions_per_user=tt.Mean("sessions"),
...     orders_per_session=tt.RatioOfMeans("orders", "sessions"),
...     orders_per_user=tt.Mean("orders"),
...     revenue_per_user=tt.Mean("revenue"),
... )
>>> data = tt.make_users_data(seed=42)
>>> result = experiment.analyze(data)
>>> print(result)
            metric control treatment rel_effect_size rel_effect_size_ci pvalue
 sessions_per_user    2.00      1.98          -0.66%      [-3.7%, 2.5%]  0.674
orders_per_session   0.266     0.289            8.8%      [-0.89%, 19%] 0.0762
   orders_per_user   0.530     0.573            8.0%       [-2.0%, 19%]  0.118
  revenue_per_user    5.24      5.73            9.3%       [-2.4%, 22%]  0.123

Using the first argument metrics which accepts metrics in a form of dictionary:

>>> experiment = tt.Experiment({
...     "sessions per user": tt.Mean("sessions"),
...     "orders per session": tt.RatioOfMeans("orders", "sessions"),
...     "orders per user": tt.Mean("orders"),
...     "revenue per user": tt.Mean("revenue"),
... })
>>> data = tt.make_users_data(seed=42)
>>> result = experiment.analyze(data)
>>> print(result)
            metric control treatment rel_effect_size rel_effect_size_ci pvalue
 sessions per user    2.00      1.98          -0.66%      [-3.7%, 2.5%]  0.674
orders per session   0.266     0.289            8.8%      [-0.89%, 19%] 0.0762
   orders per user   0.530     0.573            8.0%       [-2.0%, 19%]  0.118
  revenue per user    5.24      5.73            9.3%       [-2.4%, 22%]  0.123

Power analysis:

>>> data = tt.make_users_data(
...     seed=42,
...     sessions_uplift=0,
...     orders_uplift=0,
...     revenue_uplift=0,
...     covariates=True,
... )
>>> with tt.config_context(n_obs=(10_000, 20_000)):
...     experiment = tt.Experiment(
...         sessions_per_user=tt.Mean("sessions", "sessions_covariate"),
...         orders_per_session=tt.RatioOfMeans(
...             numer="orders",
...             denom="sessions",
...             numer_covariate="orders_covariate",
...             denom_covariate="sessions_covariate",
...         ),
...         orders_per_user=tt.Mean("orders", "orders_covariate"),
...         revenue_per_user=tt.Mean("revenue", "revenue_covariate"),
...     )
>>> power_result = experiment.solve_power(data)
>>> print(power_result)
            metric power effect_size rel_effect_size n_obs
 sessions_per_user   80%      0.0458            2.3% 10000
 sessions_per_user   80%      0.0324            1.6% 20000
orders_per_session   80%      0.0177            6.8% 10000
orders_per_session   80%      0.0125            4.8% 20000
   orders_per_user   80%      0.0374            7.2% 10000
   orders_per_user   80%      0.0264            5.1% 20000
  revenue_per_user   80%       0.488            9.2% 10000
  revenue_per_user   80%       0.345            6.5% 20000
Source code in src/tea_tasting/experiment.py
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
def __init__(
    self,
    metrics: dict[str, tea_tasting.metrics.MetricBase[Any]] | None = None,
    variant: str = "variant",
    **kw_metrics: tea_tasting.metrics.MetricBase[Any],
) -> None:
    """Experiment definition: metrics and variant column.

    Args:
        metrics: Dictionary of metrics with metric names as keys.
        variant: Variant column name.
        kw_metrics: Metrics with metric names as parameter names.

    Examples:
        ```pycon
        >>> import tea_tasting as tt

        >>> experiment = tt.Experiment(
        ...     sessions_per_user=tt.Mean("sessions"),
        ...     orders_per_session=tt.RatioOfMeans("orders", "sessions"),
        ...     orders_per_user=tt.Mean("orders"),
        ...     revenue_per_user=tt.Mean("revenue"),
        ... )
        >>> data = tt.make_users_data(seed=42)
        >>> result = experiment.analyze(data)
        >>> print(result)
                    metric control treatment rel_effect_size rel_effect_size_ci pvalue
         sessions_per_user    2.00      1.98          -0.66%      [-3.7%, 2.5%]  0.674
        orders_per_session   0.266     0.289            8.8%      [-0.89%, 19%] 0.0762
           orders_per_user   0.530     0.573            8.0%       [-2.0%, 19%]  0.118
          revenue_per_user    5.24      5.73            9.3%       [-2.4%, 22%]  0.123

        ```

        Using the first argument `metrics` which accepts metrics in a form of dictionary:

        ```pycon
        >>> experiment = tt.Experiment({
        ...     "sessions per user": tt.Mean("sessions"),
        ...     "orders per session": tt.RatioOfMeans("orders", "sessions"),
        ...     "orders per user": tt.Mean("orders"),
        ...     "revenue per user": tt.Mean("revenue"),
        ... })
        >>> data = tt.make_users_data(seed=42)
        >>> result = experiment.analyze(data)
        >>> print(result)
                    metric control treatment rel_effect_size rel_effect_size_ci pvalue
         sessions per user    2.00      1.98          -0.66%      [-3.7%, 2.5%]  0.674
        orders per session   0.266     0.289            8.8%      [-0.89%, 19%] 0.0762
           orders per user   0.530     0.573            8.0%       [-2.0%, 19%]  0.118
          revenue per user    5.24      5.73            9.3%       [-2.4%, 22%]  0.123

        ```

        Power analysis:

        ```pycon
        >>> data = tt.make_users_data(
        ...     seed=42,
        ...     sessions_uplift=0,
        ...     orders_uplift=0,
        ...     revenue_uplift=0,
        ...     covariates=True,
        ... )
        >>> with tt.config_context(n_obs=(10_000, 20_000)):
        ...     experiment = tt.Experiment(
        ...         sessions_per_user=tt.Mean("sessions", "sessions_covariate"),
        ...         orders_per_session=tt.RatioOfMeans(
        ...             numer="orders",
        ...             denom="sessions",
        ...             numer_covariate="orders_covariate",
        ...             denom_covariate="sessions_covariate",
        ...         ),
        ...         orders_per_user=tt.Mean("orders", "orders_covariate"),
        ...         revenue_per_user=tt.Mean("revenue", "revenue_covariate"),
        ...     )
        >>> power_result = experiment.solve_power(data)
        >>> print(power_result)
                    metric power effect_size rel_effect_size n_obs
         sessions_per_user   80%      0.0458            2.3% 10000
         sessions_per_user   80%      0.0324            1.6% 20000
        orders_per_session   80%      0.0177            6.8% 10000
        orders_per_session   80%      0.0125            4.8% 20000
           orders_per_user   80%      0.0374            7.2% 10000
           orders_per_user   80%      0.0264            5.1% 20000
          revenue_per_user   80%       0.488            9.2% 10000
          revenue_per_user   80%       0.345            6.5% 20000

        ```
    """  # noqa: E501
    if metrics is None:
        metrics = {}
    metrics = metrics | kw_metrics

    tea_tasting.utils.check_scalar(metrics, "metrics", typ=dict)
    tea_tasting.utils.check_scalar(len(metrics), "len(metrics)", gt=0)
    for name, metric in metrics.items():
        tea_tasting.utils.check_scalar(name, "metric_name", typ=str)
        tea_tasting.utils.check_scalar(
            metric, name, typ=tea_tasting.metrics.MetricBase)

    self.metrics = metrics
    self.variant = tea_tasting.utils.check_scalar(
        variant, "variant", typ=str)

analyze(data, control=None, *, all_variants=False) #

Analyze the experiment.

Parameters:

Name Type Description Default
data IntoFrame | Table

Experimental data.

required
control Any

Control variant. If None, the variant with the minimal ID is used as a control.

None
all_variants bool

If True, analyze all pairs of variants. Otherwise, analyze only one pair of variants.

False

Returns:

Type Description
ExperimentResult | ExperimentResults

Experiment result.

Source code in src/tea_tasting/experiment.py
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
def analyze(
    self,
    data: narwhals.typing.IntoFrame | ibis.expr.types.Table,
    control: Any = None,
    *,
    all_variants: bool = False,
) -> ExperimentResult | ExperimentResults:
    """Analyze the experiment.

    Args:
        data: Experimental data.
        control: Control variant. If `None`, the variant with the minimal ID
            is used as a control.
        all_variants: If `True`, analyze all pairs of variants. Otherwise,
            analyze only one pair of variants.

    Returns:
        Experiment result.
    """
    aggregated_data, granular_data = self._read_data(data)

    if aggregated_data is not None:
        variants = aggregated_data.keys()
    elif granular_data is not None:
        variants = granular_data.keys()
    else:
        variants = self._read_variants(data)
    variants = sorted(variants)

    if control is not None:
        variant_pairs = tuple(
            (control, treatment)
            for treatment in variants
            if treatment != control
        )
    else:
        variant_pairs = tuple(
            (control, treatment)
            for control in variants
            for treatment in variants
            if control < treatment
        )

    if len(variant_pairs) != 1 and not all_variants:
        raise ValueError(
            "all_variants is False, but there are more than one pair of variants.")

    results = ExperimentResults()
    for contr, treat in variant_pairs:
        result = ExperimentResult()
        for name, metric in self.metrics.items():
            result |= {name: self._analyze_metric(
                metric=metric,
                data=data,
                aggregated_data=aggregated_data,
                granular_data=granular_data,
                control=contr,
                treatment=treat,
            )}

        if not all_variants:
            return result

        results |= {(contr, treat): result}

    return results

solve_power(data, parameter='rel_effect_size') #

Solve for a parameter of the power of a test.

Parameters:

Name Type Description Default
data IntoFrame | Table

Sample data.

required
parameter Literal['power', 'effect_size', 'rel_effect_size', 'n_obs']

Parameter name.

'rel_effect_size'

Returns:

Type Description
ExperimentPowerResult

Power analysis result.

Source code in src/tea_tasting/experiment.py
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
def solve_power(
    self,
    data: narwhals.typing.IntoFrame | ibis.expr.types.Table,
    parameter: Literal[
        "power", "effect_size", "rel_effect_size", "n_obs"] = "rel_effect_size",
) -> ExperimentPowerResult:
    """Solve for a parameter of the power of a test.

    Args:
        data: Sample data.
        parameter: Parameter name.

    Returns:
        Power analysis result.
    """
    aggr_cols = tea_tasting.metrics.AggrCols()
    for metric in self.metrics.values():
        if isinstance(metric, tea_tasting.metrics.PowerBaseAggregated):
            aggr_cols |= metric.aggr_cols

    aggr_data = tea_tasting.aggr.read_aggregates(
        data,
        group_col=None,
        **aggr_cols._asdict(),
    ) if len(aggr_cols) > 0 else tea_tasting.aggr.Aggregates()

    result = ExperimentPowerResult()
    for name, metric in self.metrics.items():
        if isinstance(metric, tea_tasting.metrics.PowerBaseAggregated):
            result |= {name: metric.solve_power(aggr_data, parameter=parameter)}
        elif isinstance(metric, tea_tasting.metrics.PowerBase):
            result |= {name: metric.solve_power(data, parameter=parameter)}

    return result

ExperimentPowerResult #

Bases: UserDict[str, MetricPowerResults[Any]], DictsReprMixin

Result of the analysis of power in a experiment.

to_arrow() #

Convert the object to a PyArrow Table.

Source code in src/tea_tasting/utils.py
247
248
249
def to_arrow(self) -> pa.Table:
    """Convert the object to a PyArrow Table."""
    return pa.Table.from_pylist(self.to_dicts())

to_dicts() #

Convert the result to a sequence of dictionaries.

Source code in src/tea_tasting/experiment.py
114
115
116
117
118
119
def to_dicts(self) -> tuple[dict[str, Any], ...]:
    """Convert the result to a sequence of dictionaries."""
    dicts = ()
    for metric, results in self.items():
        dicts = (*dicts, *({"metric": metric} | d for d in results.to_dicts()))
    return dicts

to_html(keys=None, formatter=get_and_format_num, *, indent=None) #

Convert the object to HTML.

Default formatting rules:

  • If a name starts with "rel_" or equals to "power" consider it a percentage value. Round percentage values to 2 significant digits, multiply by 100 and add "%".
  • Round other values to 3 significant values.
  • If value is less than 0.001 or is greater than or equal to 10_000_000, format it in exponential presentation.
  • If a name ends with "_ci", consider it a confidence interval. Look up for attributes "{name}_lower" and "{name}_upper", and format the interval as "[{lower_bound}, {upper_bound}]".

Parameters:

Name Type Description Default
keys Sequence[str] | None

Keys to convert. If a key is not defined in the dictionary it's assumed to be None.

None
formatter Callable[[dict[str, Any], str], str]

Custom formatter function. It should accept a dictionary of metric result attributes and an attribute name, and return a formatted attribute value.

get_and_format_num
indent str | None

Whitespace to insert for each indentation level. If None, do not indent.

None

Returns:

Type Description
str

A table with results rendered as HTML.

Source code in src/tea_tasting/utils.py
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
def to_html(
    self,
    keys: Sequence[str] | None = None,
    formatter: Callable[[dict[str, Any], str], str] = get_and_format_num,
    *,
    indent: str | None = None,
) -> str:
    """Convert the object to HTML.

    Default formatting rules:

    - If a name starts with `"rel_"` or equals to `"power"` consider it
        a percentage value. Round percentage values to 2 significant digits,
        multiply by `100` and add `"%"`.
    - Round other values to 3 significant values.
    - If value is less than `0.001` or is greater than or equal to `10_000_000`,
        format it in exponential presentation.
    - If a name ends with `"_ci"`, consider it a confidence interval.
        Look up for attributes `"{name}_lower"` and `"{name}_upper"`,
        and format the interval as `"[{lower_bound}, {upper_bound}]"`.

    Args:
        keys: Keys to convert. If a key is not defined in the dictionary
            it's assumed to be `None`.
        formatter: Custom formatter function. It should accept a dictionary
            of metric result attributes and an attribute name, and return
            a formatted attribute value.
        indent: Whitespace to insert for each indentation level. If `None`,
            do not indent.

    Returns:
        A table with results rendered as HTML.
    """
    if keys is None:
        keys = self.default_keys
    table = ET.Element(
        "table",
        {"class": "dataframe", "style": "text-align: right;"},
    )
    thead = ET.SubElement(table, "thead")
    thead_tr = ET.SubElement(thead, "tr")
    for key in keys:
        th = ET.SubElement(thead_tr, "th")
        th.text = key
    tbody = ET.SubElement(table, "tbody")
    for data in self.to_dicts():
        tr = ET.SubElement(tbody, "tr")
        for key in keys:
            td = ET.SubElement(tr, "td")
            td.text = formatter(data, key)
    if indent is not None:
        ET.indent(table, space=indent)
    return ET.tostring(table, encoding="unicode", method="html")

to_pandas() #

Convert the object to a Pandas DataFrame.

Source code in src/tea_tasting/utils.py
251
252
253
254
def to_pandas(self) -> PandasDataFrame:
    """Convert the object to a Pandas DataFrame."""
    import pandas as pd
    return pd.DataFrame.from_records(self.to_dicts())

to_polars() #

Convert the object to a Polars DataFrame.

Source code in src/tea_tasting/utils.py
256
257
258
259
def to_polars(self) -> PolarsDataFrame:
    """Convert the object to a Polars DataFrame."""
    import polars as pl
    return pl.from_dicts(self.to_dicts())

to_pretty_dicts(keys=None, formatter=get_and_format_num) #

Convert the object to a list of dictionaries with formatted values.

Default formatting rules:

  • If a name starts with "rel_" or equals to "power" consider it a percentage value. Round percentage values to 2 significant digits, multiply by 100 and add "%".
  • Round other values to 3 significant values.
  • If value is less than 0.001 or is greater than or equal to 10_000_000, format it in exponential presentation.
  • If a name ends with "_ci", consider it a confidence interval. Look up for attributes "{name}_lower" and "{name}_upper", and format the interval as "[{lower_bound}, {upper_bound}]".

Parameters:

Name Type Description Default
keys Sequence[str] | None

Keys to convert. If a key is not defined in the dictionary it's assumed to be None.

None
formatter Callable[[dict[str, Any], str], str]

Custom formatter function. It should accept a dictionary of metric result attributes and an attribute name, and return a formatted attribute value.

get_and_format_num

Returns:

Type Description
list[dict[str, str]]

List of dictionaries with formatted values.

Source code in src/tea_tasting/utils.py
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
def to_pretty_dicts(
    self,
    keys: Sequence[str] | None = None,
    formatter: Callable[[dict[str, Any], str], str] = get_and_format_num,
) -> list[dict[str, str]]:
    """Convert the object to a list of dictionaries with formatted values.

    Default formatting rules:

    - If a name starts with `"rel_"` or equals to `"power"` consider it
        a percentage value. Round percentage values to 2 significant digits,
        multiply by `100` and add `"%"`.
    - Round other values to 3 significant values.
    - If value is less than `0.001` or is greater than or equal to `10_000_000`,
        format it in exponential presentation.
    - If a name ends with `"_ci"`, consider it a confidence interval.
        Look up for attributes `"{name}_lower"` and `"{name}_upper"`,
        and format the interval as `"[{lower_bound}, {upper_bound}]"`.

    Args:
        keys: Keys to convert. If a key is not defined in the dictionary
            it's assumed to be `None`.
        formatter: Custom formatter function. It should accept a dictionary
            of metric result attributes and an attribute name, and return
            a formatted attribute value.

    Returns:
        List of dictionaries with formatted values.
    """
    if keys is None:
        keys = self.default_keys
    return [{key: formatter(data, key) for key in keys} for data in self.to_dicts()]

to_string(keys=None, formatter=get_and_format_num) #

Convert the object to a string.

Default formatting rules:

  • If a name starts with "rel_" or equals to "power" consider it a percentage value. Round percentage values to 2 significant digits, multiply by 100 and add "%".
  • Round other values to 3 significant values.
  • If value is less than 0.001 or is greater than or equal to 10_000_000, format it in exponential presentation.
  • If a name ends with "_ci", consider it a confidence interval. Look up for attributes "{name}_lower" and "{name}_upper", and format the interval as "[{lower_bound}, {upper_bound}]".

Parameters:

Name Type Description Default
keys Sequence[str] | None

Keys to convert. If a key is not defined in the dictionary it's assumed to be None.

None
formatter Callable[[dict[str, Any], str], str]

Custom formatter function. It should accept a dictionary of metric result attributes and an attribute name, and return a formatted attribute value.

get_and_format_num

Returns:

Type Description
str

A table with results rendered as string.

Source code in src/tea_tasting/utils.py
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
def to_string(
    self,
    keys: Sequence[str] | None = None,
    formatter: Callable[[dict[str, Any], str], str] = get_and_format_num,
) -> str:
    """Convert the object to a string.

    Default formatting rules:

    - If a name starts with `"rel_"` or equals to `"power"` consider it
        a percentage value. Round percentage values to 2 significant digits,
        multiply by `100` and add `"%"`.
    - Round other values to 3 significant values.
    - If value is less than `0.001` or is greater than or equal to `10_000_000`,
        format it in exponential presentation.
    - If a name ends with `"_ci"`, consider it a confidence interval.
        Look up for attributes `"{name}_lower"` and `"{name}_upper"`,
        and format the interval as `"[{lower_bound}, {upper_bound}]"`.

    Args:
        keys: Keys to convert. If a key is not defined in the dictionary
            it's assumed to be `None`.
        formatter: Custom formatter function. It should accept a dictionary
            of metric result attributes and an attribute name, and return
            a formatted attribute value.

    Returns:
        A table with results rendered as string.
    """
    if keys is None:
        keys = self.default_keys
    widths = {key: len(key) for key in keys}

    pretty_dicts = []
    for data in self.to_dicts():
        pretty_dict = {}
        for key in keys:
            val = formatter(data, key)
            widths[key] = max(widths[key], len(val))
            pretty_dict |= {key: val}
        pretty_dicts.append(pretty_dict)

    sep = " "
    rows = [sep.join(key.rjust(widths[key]) for key in keys)]
    rows.extend(
        sep.join(pretty_dict[key].rjust(widths[key]) for key in keys)
        for pretty_dict in pretty_dicts
    )
    return "\n".join(rows)

ExperimentResult #

Bases: UserDict[str, MetricResult], DictsReprMixin

Experiment result for a pair of variants.

to_arrow() #

Convert the object to a PyArrow Table.

Source code in src/tea_tasting/utils.py
247
248
249
def to_arrow(self) -> pa.Table:
    """Convert the object to a PyArrow Table."""
    return pa.Table.from_pylist(self.to_dicts())

to_dicts() #

Convert the result to a sequence of dictionaries.

Examples:

>>> import pprint
>>> import tea_tasting as tt

>>> experiment = tt.Experiment(
...     orders_per_user=tt.Mean("orders"),
...     revenue_per_user=tt.Mean("revenue"),
... )
>>> data = tt.make_users_data(seed=42)
>>> result = experiment.analyze(data)
>>> pprint.pprint(result.to_dicts())
({'control': 0.5304003954522986,
  'effect_size': 0.04269014577177832,
  'effect_size_ci_lower': -0.010800201598205515,
  'effect_size_ci_upper': 0.09618049314176216,
  'metric': 'orders_per_user',
  'pvalue': np.float64(0.11773177998716214),
  'rel_effect_size': 0.08048664016431273,
  'rel_effect_size_ci_lower': -0.019515294044061937,
  'rel_effect_size_ci_upper': 0.1906880061278886,
  'statistic': 1.5647028839586707,
  'treatment': 0.5730905412240769},
 {'control': 5.241028175976273,
  'effect_size': 0.4890831037404775,
  'effect_size_ci_lower': -0.13261881482742033,
  'effect_size_ci_upper': 1.1107850223083753,
  'metric': 'revenue_per_user',
  'pvalue': np.float64(0.1230698855425058),
  'rel_effect_size': 0.09331815958981626,
  'rel_effect_size_ci_lower': -0.02373770894855798,
  'rel_effect_size_ci_upper': 0.22440926894909308,
  'statistic': 1.5423440700784083,
  'treatment': 5.73011127971675})
Source code in src/tea_tasting/experiment.py
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
def to_dicts(self) -> tuple[dict[str, Any], ...]:
    """Convert the result to a sequence of dictionaries.

    Examples:
        ```pycon
        >>> import pprint
        >>> import tea_tasting as tt

        >>> experiment = tt.Experiment(
        ...     orders_per_user=tt.Mean("orders"),
        ...     revenue_per_user=tt.Mean("revenue"),
        ... )
        >>> data = tt.make_users_data(seed=42)
        >>> result = experiment.analyze(data)
        >>> pprint.pprint(result.to_dicts())
        ({'control': 0.5304003954522986,
          'effect_size': 0.04269014577177832,
          'effect_size_ci_lower': -0.010800201598205515,
          'effect_size_ci_upper': 0.09618049314176216,
          'metric': 'orders_per_user',
          'pvalue': np.float64(0.11773177998716214),
          'rel_effect_size': 0.08048664016431273,
          'rel_effect_size_ci_lower': -0.019515294044061937,
          'rel_effect_size_ci_upper': 0.1906880061278886,
          'statistic': 1.5647028839586707,
          'treatment': 0.5730905412240769},
         {'control': 5.241028175976273,
          'effect_size': 0.4890831037404775,
          'effect_size_ci_lower': -0.13261881482742033,
          'effect_size_ci_upper': 1.1107850223083753,
          'metric': 'revenue_per_user',
          'pvalue': np.float64(0.1230698855425058),
          'rel_effect_size': 0.09331815958981626,
          'rel_effect_size_ci_lower': -0.02373770894855798,
          'rel_effect_size_ci_upper': 0.22440926894909308,
          'statistic': 1.5423440700784083,
          'treatment': 5.73011127971675})

        ```
    """
    return tuple(
        {"metric": k} | (v if isinstance(v, dict) else v._asdict())
        for k, v in self.items()
    )

to_html(keys=None, formatter=get_and_format_num, *, indent=None) #

Convert the object to HTML.

Default formatting rules:

  • If a name starts with "rel_" or equals to "power" consider it a percentage value. Round percentage values to 2 significant digits, multiply by 100 and add "%".
  • Round other values to 3 significant values.
  • If value is less than 0.001 or is greater than or equal to 10_000_000, format it in exponential presentation.
  • If a name ends with "_ci", consider it a confidence interval. Look up for attributes "{name}_lower" and "{name}_upper", and format the interval as "[{lower_bound}, {upper_bound}]".

Parameters:

Name Type Description Default
keys Sequence[str] | None

Keys to convert. If a key is not defined in the dictionary it's assumed to be None.

None
formatter Callable[[dict[str, Any], str], str]

Custom formatter function. It should accept a dictionary of metric result attributes and an attribute name, and return a formatted attribute value.

get_and_format_num
indent str | None

Whitespace to insert for each indentation level. If None, do not indent.

None

Returns:

Type Description
str

A table with results rendered as HTML.

Source code in src/tea_tasting/utils.py
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
def to_html(
    self,
    keys: Sequence[str] | None = None,
    formatter: Callable[[dict[str, Any], str], str] = get_and_format_num,
    *,
    indent: str | None = None,
) -> str:
    """Convert the object to HTML.

    Default formatting rules:

    - If a name starts with `"rel_"` or equals to `"power"` consider it
        a percentage value. Round percentage values to 2 significant digits,
        multiply by `100` and add `"%"`.
    - Round other values to 3 significant values.
    - If value is less than `0.001` or is greater than or equal to `10_000_000`,
        format it in exponential presentation.
    - If a name ends with `"_ci"`, consider it a confidence interval.
        Look up for attributes `"{name}_lower"` and `"{name}_upper"`,
        and format the interval as `"[{lower_bound}, {upper_bound}]"`.

    Args:
        keys: Keys to convert. If a key is not defined in the dictionary
            it's assumed to be `None`.
        formatter: Custom formatter function. It should accept a dictionary
            of metric result attributes and an attribute name, and return
            a formatted attribute value.
        indent: Whitespace to insert for each indentation level. If `None`,
            do not indent.

    Returns:
        A table with results rendered as HTML.
    """
    if keys is None:
        keys = self.default_keys
    table = ET.Element(
        "table",
        {"class": "dataframe", "style": "text-align: right;"},
    )
    thead = ET.SubElement(table, "thead")
    thead_tr = ET.SubElement(thead, "tr")
    for key in keys:
        th = ET.SubElement(thead_tr, "th")
        th.text = key
    tbody = ET.SubElement(table, "tbody")
    for data in self.to_dicts():
        tr = ET.SubElement(tbody, "tr")
        for key in keys:
            td = ET.SubElement(tr, "td")
            td.text = formatter(data, key)
    if indent is not None:
        ET.indent(table, space=indent)
    return ET.tostring(table, encoding="unicode", method="html")

to_pandas() #

Convert the object to a Pandas DataFrame.

Source code in src/tea_tasting/utils.py
251
252
253
254
def to_pandas(self) -> PandasDataFrame:
    """Convert the object to a Pandas DataFrame."""
    import pandas as pd
    return pd.DataFrame.from_records(self.to_dicts())

to_polars() #

Convert the object to a Polars DataFrame.

Source code in src/tea_tasting/utils.py
256
257
258
259
def to_polars(self) -> PolarsDataFrame:
    """Convert the object to a Polars DataFrame."""
    import polars as pl
    return pl.from_dicts(self.to_dicts())

to_pretty_dicts(keys=None, formatter=get_and_format_num) #

Convert the object to a list of dictionaries with formatted values.

Default formatting rules:

  • If a name starts with "rel_" or equals to "power" consider it a percentage value. Round percentage values to 2 significant digits, multiply by 100 and add "%".
  • Round other values to 3 significant values.
  • If value is less than 0.001 or is greater than or equal to 10_000_000, format it in exponential presentation.
  • If a name ends with "_ci", consider it a confidence interval. Look up for attributes "{name}_lower" and "{name}_upper", and format the interval as "[{lower_bound}, {upper_bound}]".

Parameters:

Name Type Description Default
keys Sequence[str] | None

Keys to convert. If a key is not defined in the dictionary it's assumed to be None.

None
formatter Callable[[dict[str, Any], str], str]

Custom formatter function. It should accept a dictionary of metric result attributes and an attribute name, and return a formatted attribute value.

get_and_format_num

Returns:

Type Description
list[dict[str, str]]

List of dictionaries with formatted values.

Source code in src/tea_tasting/utils.py
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
def to_pretty_dicts(
    self,
    keys: Sequence[str] | None = None,
    formatter: Callable[[dict[str, Any], str], str] = get_and_format_num,
) -> list[dict[str, str]]:
    """Convert the object to a list of dictionaries with formatted values.

    Default formatting rules:

    - If a name starts with `"rel_"` or equals to `"power"` consider it
        a percentage value. Round percentage values to 2 significant digits,
        multiply by `100` and add `"%"`.
    - Round other values to 3 significant values.
    - If value is less than `0.001` or is greater than or equal to `10_000_000`,
        format it in exponential presentation.
    - If a name ends with `"_ci"`, consider it a confidence interval.
        Look up for attributes `"{name}_lower"` and `"{name}_upper"`,
        and format the interval as `"[{lower_bound}, {upper_bound}]"`.

    Args:
        keys: Keys to convert. If a key is not defined in the dictionary
            it's assumed to be `None`.
        formatter: Custom formatter function. It should accept a dictionary
            of metric result attributes and an attribute name, and return
            a formatted attribute value.

    Returns:
        List of dictionaries with formatted values.
    """
    if keys is None:
        keys = self.default_keys
    return [{key: formatter(data, key) for key in keys} for data in self.to_dicts()]

to_string(keys=None, formatter=get_and_format_num) #

Convert the object to a string.

Default formatting rules:

  • If a name starts with "rel_" or equals to "power" consider it a percentage value. Round percentage values to 2 significant digits, multiply by 100 and add "%".
  • Round other values to 3 significant values.
  • If value is less than 0.001 or is greater than or equal to 10_000_000, format it in exponential presentation.
  • If a name ends with "_ci", consider it a confidence interval. Look up for attributes "{name}_lower" and "{name}_upper", and format the interval as "[{lower_bound}, {upper_bound}]".

Parameters:

Name Type Description Default
keys Sequence[str] | None

Keys to convert. If a key is not defined in the dictionary it's assumed to be None.

None
formatter Callable[[dict[str, Any], str], str]

Custom formatter function. It should accept a dictionary of metric result attributes and an attribute name, and return a formatted attribute value.

get_and_format_num

Returns:

Type Description
str

A table with results rendered as string.

Source code in src/tea_tasting/utils.py
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
def to_string(
    self,
    keys: Sequence[str] | None = None,
    formatter: Callable[[dict[str, Any], str], str] = get_and_format_num,
) -> str:
    """Convert the object to a string.

    Default formatting rules:

    - If a name starts with `"rel_"` or equals to `"power"` consider it
        a percentage value. Round percentage values to 2 significant digits,
        multiply by `100` and add `"%"`.
    - Round other values to 3 significant values.
    - If value is less than `0.001` or is greater than or equal to `10_000_000`,
        format it in exponential presentation.
    - If a name ends with `"_ci"`, consider it a confidence interval.
        Look up for attributes `"{name}_lower"` and `"{name}_upper"`,
        and format the interval as `"[{lower_bound}, {upper_bound}]"`.

    Args:
        keys: Keys to convert. If a key is not defined in the dictionary
            it's assumed to be `None`.
        formatter: Custom formatter function. It should accept a dictionary
            of metric result attributes and an attribute name, and return
            a formatted attribute value.

    Returns:
        A table with results rendered as string.
    """
    if keys is None:
        keys = self.default_keys
    widths = {key: len(key) for key in keys}

    pretty_dicts = []
    for data in self.to_dicts():
        pretty_dict = {}
        for key in keys:
            val = formatter(data, key)
            widths[key] = max(widths[key], len(val))
            pretty_dict |= {key: val}
        pretty_dicts.append(pretty_dict)

    sep = " "
    rows = [sep.join(key.rjust(widths[key]) for key in keys)]
    rows.extend(
        sep.join(pretty_dict[key].rjust(widths[key]) for key in keys)
        for pretty_dict in pretty_dicts
    )
    return "\n".join(rows)

ExperimentResults #

Bases: UserDict[tuple[Any, Any], ExperimentResult], DictsReprMixin

Experiment results for multiple pairs of variants.

to_arrow() #

Convert the object to a PyArrow Table.

Source code in src/tea_tasting/utils.py
247
248
249
def to_arrow(self) -> pa.Table:
    """Convert the object to a PyArrow Table."""
    return pa.Table.from_pylist(self.to_dicts())

to_dicts() #

Convert the result to a sequence of dictionaries.

Source code in src/tea_tasting/experiment.py
 98
 99
100
101
102
103
104
def to_dicts(self) -> tuple[dict[str, Any], ...]:
    """Convert the result to a sequence of dictionaries."""
    return tuple(
        {"variants": str(variants)} | metric_result
        for variants, experiment_result in self.items()
        for metric_result in experiment_result.to_dicts()
    )

to_html(keys=None, formatter=get_and_format_num, *, indent=None) #

Convert the object to HTML.

Default formatting rules:

  • If a name starts with "rel_" or equals to "power" consider it a percentage value. Round percentage values to 2 significant digits, multiply by 100 and add "%".
  • Round other values to 3 significant values.
  • If value is less than 0.001 or is greater than or equal to 10_000_000, format it in exponential presentation.
  • If a name ends with "_ci", consider it a confidence interval. Look up for attributes "{name}_lower" and "{name}_upper", and format the interval as "[{lower_bound}, {upper_bound}]".

Parameters:

Name Type Description Default
keys Sequence[str] | None

Keys to convert. If a key is not defined in the dictionary it's assumed to be None.

None
formatter Callable[[dict[str, Any], str], str]

Custom formatter function. It should accept a dictionary of metric result attributes and an attribute name, and return a formatted attribute value.

get_and_format_num
indent str | None

Whitespace to insert for each indentation level. If None, do not indent.

None

Returns:

Type Description
str

A table with results rendered as HTML.

Source code in src/tea_tasting/utils.py
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
def to_html(
    self,
    keys: Sequence[str] | None = None,
    formatter: Callable[[dict[str, Any], str], str] = get_and_format_num,
    *,
    indent: str | None = None,
) -> str:
    """Convert the object to HTML.

    Default formatting rules:

    - If a name starts with `"rel_"` or equals to `"power"` consider it
        a percentage value. Round percentage values to 2 significant digits,
        multiply by `100` and add `"%"`.
    - Round other values to 3 significant values.
    - If value is less than `0.001` or is greater than or equal to `10_000_000`,
        format it in exponential presentation.
    - If a name ends with `"_ci"`, consider it a confidence interval.
        Look up for attributes `"{name}_lower"` and `"{name}_upper"`,
        and format the interval as `"[{lower_bound}, {upper_bound}]"`.

    Args:
        keys: Keys to convert. If a key is not defined in the dictionary
            it's assumed to be `None`.
        formatter: Custom formatter function. It should accept a dictionary
            of metric result attributes and an attribute name, and return
            a formatted attribute value.
        indent: Whitespace to insert for each indentation level. If `None`,
            do not indent.

    Returns:
        A table with results rendered as HTML.
    """
    if keys is None:
        keys = self.default_keys
    table = ET.Element(
        "table",
        {"class": "dataframe", "style": "text-align: right;"},
    )
    thead = ET.SubElement(table, "thead")
    thead_tr = ET.SubElement(thead, "tr")
    for key in keys:
        th = ET.SubElement(thead_tr, "th")
        th.text = key
    tbody = ET.SubElement(table, "tbody")
    for data in self.to_dicts():
        tr = ET.SubElement(tbody, "tr")
        for key in keys:
            td = ET.SubElement(tr, "td")
            td.text = formatter(data, key)
    if indent is not None:
        ET.indent(table, space=indent)
    return ET.tostring(table, encoding="unicode", method="html")

to_pandas() #

Convert the object to a Pandas DataFrame.

Source code in src/tea_tasting/utils.py
251
252
253
254
def to_pandas(self) -> PandasDataFrame:
    """Convert the object to a Pandas DataFrame."""
    import pandas as pd
    return pd.DataFrame.from_records(self.to_dicts())

to_polars() #

Convert the object to a Polars DataFrame.

Source code in src/tea_tasting/utils.py
256
257
258
259
def to_polars(self) -> PolarsDataFrame:
    """Convert the object to a Polars DataFrame."""
    import polars as pl
    return pl.from_dicts(self.to_dicts())

to_pretty_dicts(keys=None, formatter=get_and_format_num) #

Convert the object to a list of dictionaries with formatted values.

Default formatting rules:

  • If a name starts with "rel_" or equals to "power" consider it a percentage value. Round percentage values to 2 significant digits, multiply by 100 and add "%".
  • Round other values to 3 significant values.
  • If value is less than 0.001 or is greater than or equal to 10_000_000, format it in exponential presentation.
  • If a name ends with "_ci", consider it a confidence interval. Look up for attributes "{name}_lower" and "{name}_upper", and format the interval as "[{lower_bound}, {upper_bound}]".

Parameters:

Name Type Description Default
keys Sequence[str] | None

Keys to convert. If a key is not defined in the dictionary it's assumed to be None.

None
formatter Callable[[dict[str, Any], str], str]

Custom formatter function. It should accept a dictionary of metric result attributes and an attribute name, and return a formatted attribute value.

get_and_format_num

Returns:

Type Description
list[dict[str, str]]

List of dictionaries with formatted values.

Source code in src/tea_tasting/utils.py
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
def to_pretty_dicts(
    self,
    keys: Sequence[str] | None = None,
    formatter: Callable[[dict[str, Any], str], str] = get_and_format_num,
) -> list[dict[str, str]]:
    """Convert the object to a list of dictionaries with formatted values.

    Default formatting rules:

    - If a name starts with `"rel_"` or equals to `"power"` consider it
        a percentage value. Round percentage values to 2 significant digits,
        multiply by `100` and add `"%"`.
    - Round other values to 3 significant values.
    - If value is less than `0.001` or is greater than or equal to `10_000_000`,
        format it in exponential presentation.
    - If a name ends with `"_ci"`, consider it a confidence interval.
        Look up for attributes `"{name}_lower"` and `"{name}_upper"`,
        and format the interval as `"[{lower_bound}, {upper_bound}]"`.

    Args:
        keys: Keys to convert. If a key is not defined in the dictionary
            it's assumed to be `None`.
        formatter: Custom formatter function. It should accept a dictionary
            of metric result attributes and an attribute name, and return
            a formatted attribute value.

    Returns:
        List of dictionaries with formatted values.
    """
    if keys is None:
        keys = self.default_keys
    return [{key: formatter(data, key) for key in keys} for data in self.to_dicts()]

to_string(keys=None, formatter=get_and_format_num) #

Convert the object to a string.

Default formatting rules:

  • If a name starts with "rel_" or equals to "power" consider it a percentage value. Round percentage values to 2 significant digits, multiply by 100 and add "%".
  • Round other values to 3 significant values.
  • If value is less than 0.001 or is greater than or equal to 10_000_000, format it in exponential presentation.
  • If a name ends with "_ci", consider it a confidence interval. Look up for attributes "{name}_lower" and "{name}_upper", and format the interval as "[{lower_bound}, {upper_bound}]".

Parameters:

Name Type Description Default
keys Sequence[str] | None

Keys to convert. If a key is not defined in the dictionary it's assumed to be None.

None
formatter Callable[[dict[str, Any], str], str]

Custom formatter function. It should accept a dictionary of metric result attributes and an attribute name, and return a formatted attribute value.

get_and_format_num

Returns:

Type Description
str

A table with results rendered as string.

Source code in src/tea_tasting/utils.py
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
def to_string(
    self,
    keys: Sequence[str] | None = None,
    formatter: Callable[[dict[str, Any], str], str] = get_and_format_num,
) -> str:
    """Convert the object to a string.

    Default formatting rules:

    - If a name starts with `"rel_"` or equals to `"power"` consider it
        a percentage value. Round percentage values to 2 significant digits,
        multiply by `100` and add `"%"`.
    - Round other values to 3 significant values.
    - If value is less than `0.001` or is greater than or equal to `10_000_000`,
        format it in exponential presentation.
    - If a name ends with `"_ci"`, consider it a confidence interval.
        Look up for attributes `"{name}_lower"` and `"{name}_upper"`,
        and format the interval as `"[{lower_bound}, {upper_bound}]"`.

    Args:
        keys: Keys to convert. If a key is not defined in the dictionary
            it's assumed to be `None`.
        formatter: Custom formatter function. It should accept a dictionary
            of metric result attributes and an attribute name, and return
            a formatted attribute value.

    Returns:
        A table with results rendered as string.
    """
    if keys is None:
        keys = self.default_keys
    widths = {key: len(key) for key in keys}

    pretty_dicts = []
    for data in self.to_dicts():
        pretty_dict = {}
        for key in keys:
            val = formatter(data, key)
            widths[key] = max(widths[key], len(val))
            pretty_dict |= {key: val}
        pretty_dicts.append(pretty_dict)

    sep = " "
    rows = [sep.join(key.rjust(widths[key]) for key in keys)]
    rows.extend(
        sep.join(pretty_dict[key].rjust(widths[key]) for key in keys)
        for pretty_dict in pretty_dicts
    )
    return "\n".join(rows)