Skip to content

tea_tasting.metrics.proportion #

Metrics for the analysis of proportions.

SampleRatio(ratio=1, *, method='auto', correction=True) #

Bases: MetricBaseAggregated[SampleRatioResult]

Metric for sample ratio mismatch check.

Parameters:

Name Type Description Default
ratio float | int | dict[Any, float | int]

Expected ratio of the number of observations in the treatment relative to the control.

1
method Literal['auto', 'binom', 'norm']

Statistical test used for calculation of p-value:

  • "auto": Apply exact binomial test if the total number of observations is < 1000; or normal approximation otherwise.
  • "binom": Apply exact binomial test.
  • "norm": Apply normal approximation of the binomial distribution.
'auto'
correction bool

If True, add continuity correction. Only for normal approximation.

True

Examples:

>>> import tea_tasting as tt

>>> experiment = tt.Experiment(
...     sample_ratio=tt.SampleRatio(),
... )
>>> data = tt.make_users_data(seed=42)
>>> result = experiment.analyze(data)
>>> print(result.to_string(("metric", "control", "treatment", "pvalue")))
      metric control treatment pvalue
sample_ratio    2023      1977  0.477

Different expected ratio:

>>> experiment = tt.Experiment(
...     sample_ratio=tt.SampleRatio(0.5),
... )
>>> data = tt.make_users_data(seed=42)
>>> result = experiment.analyze(data)
>>> print(result.to_string(("metric", "control", "treatment", "pvalue")))
      metric control treatment    pvalue
sample_ratio    2023      1977 3.26e-103
Source code in src/tea_tasting/metrics/proportion.py
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
def __init__(
    self,
    ratio: float | int | dict[Any, float | int] = 1,
    *,
    method: Literal["auto", "binom", "norm"] = "auto",
    correction: bool = True,
) -> None:
    """Metric for sample ratio mismatch check.

    Args:
        ratio: Expected ratio of the number of observations in the treatment
            relative to the control.
        method: Statistical test used for calculation of p-value:

            - `"auto"`: Apply exact binomial test if the total number
                of observations is < 1000; or normal approximation otherwise.
            - `"binom"`: Apply exact binomial test.
            - `"norm"`: Apply normal approximation of the binomial distribution.

        correction: If `True`, add continuity correction.
            Only for normal approximation.

    Examples:
        ```pycon
        >>> import tea_tasting as tt

        >>> experiment = tt.Experiment(
        ...     sample_ratio=tt.SampleRatio(),
        ... )
        >>> data = tt.make_users_data(seed=42)
        >>> result = experiment.analyze(data)
        >>> print(result.to_string(("metric", "control", "treatment", "pvalue")))
              metric control treatment pvalue
        sample_ratio    2023      1977  0.477

        ```

        Different expected ratio:

        ```pycon
        >>> experiment = tt.Experiment(
        ...     sample_ratio=tt.SampleRatio(0.5),
        ... )
        >>> data = tt.make_users_data(seed=42)
        >>> result = experiment.analyze(data)
        >>> print(result.to_string(("metric", "control", "treatment", "pvalue")))
              metric control treatment    pvalue
        sample_ratio    2023      1977 3.26e-103

        ```
    """
    if isinstance(ratio, dict):
        for val in ratio.values():
            tea_tasting.utils.auto_check(val, "ratio")
    else:
        tea_tasting.utils.auto_check(ratio, "ratio")
    self.ratio = ratio

    self.method = tea_tasting.utils.check_scalar(
        method, "method", typ=str, in_={"auto", "binom", "norm"})
    self.correction = tea_tasting.utils.auto_check(correction, "correction")

aggr_cols: AggrCols property #

Columns to be aggregated for a metric analysis.

analyze(data, control, treatment, variant=None) #

Perform a sample ratio mismatch check.

Parameters:

Name Type Description Default
data IntoFrame | Table | dict[Any, Aggregates]

Experimental data.

required
control Any

Control variant.

required
treatment Any

Treatment variant.

required
variant str | None

Variant column name.

None

Returns:

Type Description
SampleRatioResult

Analysis result.

Source code in src/tea_tasting/metrics/proportion.py
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
def analyze(
    self,
    data: narwhals.typing.IntoFrame | ibis.expr.types.Table | dict[
        Any, tea_tasting.aggr.Aggregates],
    control: Any,
    treatment: Any,
    variant: str | None = None,
) -> SampleRatioResult:
    """Perform a sample ratio mismatch check.

    Args:
        data: Experimental data.
        control: Control variant.
        treatment: Treatment variant.
        variant: Variant column name.

    Returns:
        Analysis result.
    """
    aggr = tea_tasting.metrics.aggregate_by_variants(
        data,
        aggr_cols=self.aggr_cols,
        variant=variant,
    )

    k = aggr[treatment].count()
    n = k + aggr[control].count()

    r = (
        self.ratio
        if isinstance(self.ratio, float | int)
        else self.ratio[treatment] / self.ratio[control]
    )
    p = r / (1 + r)

    if (
        self.method == "binom" or
        (self.method == "auto" and n < _MAX_EXACT_THRESHOLD)
    ):
        pvalue = scipy.stats.binomtest(k=int(k), n=int(n), p=p).pvalue
    else:  # norm
        d = k - n*p
        if self.correction and d != 0:
            d = min(d + 0.5, 0) if d < 0 else max(d - 0.5, 0)
        z = d / math.sqrt(n * p * (1 - p))
        pvalue = 2 * scipy.stats.norm.sf(abs(z))

    return SampleRatioResult(
        control=n - k,
        treatment=k,
        pvalue=pvalue,  # type: ignore
    )

analyze_aggregates(control, treatment) #

Stub method for compatibility with the base class.

Source code in src/tea_tasting/metrics/proportion.py
164
165
166
167
168
169
170
def analyze_aggregates(
    self,
    control: tea_tasting.aggr.Aggregates,
    treatment: tea_tasting.aggr.Aggregates,
) -> SampleRatioResult:
    """Stub method for compatibility with the base class."""
    raise NotImplementedError

SampleRatioResult #

Bases: NamedTuple

Result of the sample ratio mismatch check.

Attributes:

Name Type Description
control float

Number of observations in control.

treatment float

Number of observations in treatment.

pvalue float

P-value