Skip to content

tea_tasting.metrics.base #

Base classes for metrics.

AggrCols #

Bases: NamedTuple

Columns to be aggregated for a metric analysis.

Attributes:

Name Type Description
has_count bool

If True, include the sample size.

mean_cols Sequence[str]

Column names for calculation of sample means.

var_cols Sequence[str]

Column names for calculation of sample variances.

cov_cols Sequence[tuple[str, str]]

Pairs of column names for calculation of sample covariances.

MetricBase #

Bases: ABC, Generic[R], ReprMixin

Base class for metrics.

analyze(data, control, treatment, variant) abstractmethod #

Analyze a metric in an experiment.

Parameters:

Name Type Description Default
data DataFrame | Table

Experimental data.

required
control Any

Control variant.

required
treatment Any

Treatment variant.

required
variant str

Variant column name.

required

Returns:

Type Description
R

Analysis result.

Source code in src/tea_tasting/metrics/base.py
@abc.abstractmethod
def analyze(
    self,
    data: pd.DataFrame | ibis.expr.types.Table,
    control: Any,
    treatment: Any,
    variant: str,
) -> R:
    """Analyze a metric in an experiment.

    Args:
        data: Experimental data.
        control: Control variant.
        treatment: Treatment variant.
        variant: Variant column name.

    Returns:
        Analysis result.
    """

MetricBaseAggregated #

Bases: MetricBase[R], _HasAggrCols

Base class for metrics, which are analyzed using aggregated statistics.

aggr_cols: AggrCols abstractmethod property #

Columns to be aggregated for an analysis.

analyze(data, control, treatment, variant=None) #

Analyze a metric in an experiment.

Parameters:

Name Type Description Default
data DataFrame | Table | dict[Any, Aggregates]

Experimental data.

required
control Any

Control variant.

required
treatment Any

Treatment variant.

required
variant str | None

Variant column name.

None

Returns:

Type Description
R

Analysis result.

Source code in src/tea_tasting/metrics/base.py
def analyze(
    self,
    data: pd.DataFrame | ibis.expr.types.Table | dict[
        Any, tea_tasting.aggr.Aggregates],
    control: Any,
    treatment: Any,
    variant: str | None = None,
) -> R:
    """Analyze a metric in an experiment.

    Args:
        data: Experimental data.
        control: Control variant.
        treatment: Treatment variant.
        variant: Variant column name.

    Returns:
        Analysis result.
    """
    aggr = aggregate_by_variants(
        data,
        aggr_cols=self.aggr_cols,
        variant=variant,
    )
    return self.analyze_aggregates(
        control=aggr[control],
        treatment=aggr[treatment],
    )

analyze_aggregates(control, treatment) abstractmethod #

Analyze metric in an experiment using aggregated statistics.

Parameters:

Name Type Description Default
control Aggregates

Control data.

required
treatment Aggregates

Treatment data.

required

Returns:

Type Description
R

Analysis result.

Source code in src/tea_tasting/metrics/base.py
@abc.abstractmethod
def analyze_aggregates(
    self,
    control: tea_tasting.aggr.Aggregates,
    treatment: tea_tasting.aggr.Aggregates,
) -> R:
    """Analyze metric in an experiment using aggregated statistics.

    Args:
        control: Control data.
        treatment: Treatment data.

    Returns:
        Analysis result.
    """

MetricBaseGranular #

Bases: MetricBase[R], _HasCols

Base class for metrics, which are analyzed using granular data.

cols: Sequence[str] abstractmethod property #

Columns to be fetched for an analysis.

analyze(data, control, treatment, variant=None) #

Analyze a metric in an experiment.

Parameters:

Name Type Description Default
data DataFrame | Table | dict[Any, DataFrame]

Experimental data.

required
control Any

Control variant.

required
treatment Any

Treatment variant.

required
variant str | None

Variant column name.

None

Returns:

Type Description
R

Analysis result.

Source code in src/tea_tasting/metrics/base.py
def analyze(
    self,
    data: pd.DataFrame | ibis.expr.types.Table | dict[Any, pd.DataFrame],
    control: Any,
    treatment: Any,
    variant: str | None = None,
) -> R:
    """Analyze a metric in an experiment.

    Args:
        data: Experimental data.
        control: Control variant.
        treatment: Treatment variant.
        variant: Variant column name.

    Returns:
        Analysis result.
    """
    dfs = read_dataframes(
        data,
        cols=self.cols,
        variant=variant,
    )
    return self.analyze_dataframes(
        control=dfs[control],
        treatment=dfs[treatment],
    )

analyze_dataframes(control, treatment) abstractmethod #

Analyze metric in an experiment using granular data.

Parameters:

Name Type Description Default
control DataFrame

Control data.

required
treatment DataFrame

Treatment data.

required

Returns:

Type Description
R

Analysis result.

Source code in src/tea_tasting/metrics/base.py
@abc.abstractmethod
def analyze_dataframes(
    self,
    control: pd.DataFrame,
    treatment: pd.DataFrame,
) -> R:
    """Analyze metric in an experiment using granular data.

    Args:
        control: Control data.
        treatment: Treatment data.

    Returns:
        Analysis result.
    """

MetricPowerResults #

Bases: UserList[P], PrettyDictsMixin

Power analysis results.

to_dicts() #

"Convert the results to a sequence of dictionaries.

Source code in src/tea_tasting/metrics/base.py
def to_dicts(self) -> tuple[dict[str, Any], ...]:
    """"Convert the results to a sequence of dictionaries."""
    return tuple((v if isinstance(v, dict) else v._asdict()) for v in self)

to_html(keys=None, formatter=get_and_format_num) #

Convert the object to HTML.

Parameters:

Name Type Description Default
keys Sequence[str] | None

Keys to convert. If a key is not defined in the dictionary it's assumed to be None.

None
formatter Callable[[dict[str, Any], str], str]

Custom formatter function. It should accept a dictionary of metric result attributes and an attribute name, and return a formatted attribute value.

get_and_format_num

Returns:

Type Description
str

A table with results rendered as HTML.

Default formatting rules
  • If a name starts with "rel_" or equals to "power" consider it a percentage value. Round percentage values to 2 significant digits, multiply by 100 and add "%".
  • Round other values to 3 significant values.
  • If value is less than 0.001, format it in exponential presentation.
  • If a name ends with "_ci", consider it a confidence interval. Look up for attributes "{name}_lower" and "{name}_upper", and format the interval as "[{lower_bound}, {lower_bound}]".
Source code in src/tea_tasting/utils.py
def to_html(
    self,
    keys: Sequence[str] | None = None,
    formatter: Callable[[dict[str, Any], str], str] = get_and_format_num,
) -> str:
    """Convert the object to HTML.

    Args:
        keys: Keys to convert. If a key is not defined in the dictionary
            it's assumed to be `None`.
        formatter: Custom formatter function. It should accept a dictionary
            of metric result attributes and an attribute name, and return
            a formatted attribute value.

    Returns:
        A table with results rendered as HTML.

    Default formatting rules:
        - If a name starts with `"rel_"` or equals to `"power"` consider it
            a percentage value. Round percentage values to 2 significant digits,
            multiply by `100` and add `"%"`.
        - Round other values to 3 significant values.
        - If value is less than `0.001`, format it in exponential presentation.
        - If a name ends with `"_ci"`, consider it a confidence interval.
            Look up for attributes `"{name}_lower"` and `"{name}_upper"`,
            and format the interval as `"[{lower_bound}, {lower_bound}]"`.
    """
    return self.to_pretty(keys, formatter).to_html(index=False)

to_pandas() #

Convert the object to a Pandas DataFrame.

Source code in src/tea_tasting/utils.py
def to_pandas(self) -> pd.DataFrame:
    """Convert the object to a Pandas DataFrame."""
    return pd.DataFrame.from_records(self.to_dicts())

to_pretty(keys=None, formatter=get_and_format_num) #

Convert the object to a Pandas Dataframe with formatted values.

Parameters:

Name Type Description Default
keys Sequence[str] | None

Keys to convert. If a key is not defined in the dictionary it's assumed to be None.

None
formatter Callable[[dict[str, Any], str], str]

Custom formatter function. It should accept a dictionary of metric result attributes and an attribute name, and return a formatted attribute value.

get_and_format_num

Returns:

Type Description
DataFrame

Pandas Dataframe with formatted values.

Default formatting rules
  • If a name starts with "rel_" or equals to "power" consider it a percentage value. Round percentage values to 2 significant digits, multiply by 100 and add "%".
  • Round other values to 3 significant values.
  • If value is less than 0.001, format it in exponential presentation.
  • If a name ends with "_ci", consider it a confidence interval. Look up for attributes "{name}_lower" and "{name}_upper", and format the interval as "[{lower_bound}, {lower_bound}]".
Source code in src/tea_tasting/utils.py
def to_pretty(
    self,
    keys: Sequence[str] | None = None,
    formatter: Callable[[dict[str, Any], str], str] = get_and_format_num,
) -> pd.DataFrame:
    """Convert the object to a Pandas Dataframe with formatted values.

    Args:
        keys: Keys to convert. If a key is not defined in the dictionary
            it's assumed to be `None`.
        formatter: Custom formatter function. It should accept a dictionary
            of metric result attributes and an attribute name, and return
            a formatted attribute value.

    Returns:
        Pandas Dataframe with formatted values.

    Default formatting rules:
        - If a name starts with `"rel_"` or equals to `"power"` consider it
            a percentage value. Round percentage values to 2 significant digits,
            multiply by `100` and add `"%"`.
        - Round other values to 3 significant values.
        - If value is less than `0.001`, format it in exponential presentation.
        - If a name ends with `"_ci"`, consider it a confidence interval.
            Look up for attributes `"{name}_lower"` and `"{name}_upper"`,
            and format the interval as `"[{lower_bound}, {lower_bound}]"`.
    """
    if keys is None:
        keys = self.default_keys
    return pd.DataFrame.from_records(
        {key: formatter(data, key) for key in keys}
        for data in self.to_dicts()
    )

to_string(keys=None, formatter=get_and_format_num) #

Convert the object to a string.

Parameters:

Name Type Description Default
keys Sequence[str] | None

Keys to convert. If a key is not defined in the dictionary it's assumed to be None.

None
formatter Callable[[dict[str, Any], str], str]

Custom formatter function. It should accept a dictionary of metric result attributes and an attribute name, and return a formatted attribute value.

get_and_format_num

Returns:

Type Description
str

A table with results rendered as string.

Default formatting rules
  • If a name starts with "rel_" or equals to "power" consider it a percentage value. Round percentage values to 2 significant digits, multiply by 100 and add "%".
  • Round other values to 3 significant values.
  • If value is less than 0.001, format it in exponential presentation.
  • If a name ends with "_ci", consider it a confidence interval. Look up for attributes "{name}_lower" and "{name}_upper", and format the interval as "[{lower_bound}, {lower_bound}]".
Source code in src/tea_tasting/utils.py
def to_string(
    self,
    keys: Sequence[str] | None = None,
    formatter: Callable[[dict[str, Any], str], str] = get_and_format_num,
) -> str:
    """Convert the object to a string.

    Args:
        keys: Keys to convert. If a key is not defined in the dictionary
            it's assumed to be `None`.
        formatter: Custom formatter function. It should accept a dictionary
            of metric result attributes and an attribute name, and return
            a formatted attribute value.

    Returns:
        A table with results rendered as string.

    Default formatting rules:
        - If a name starts with `"rel_"` or equals to `"power"` consider it
            a percentage value. Round percentage values to 2 significant digits,
            multiply by `100` and add `"%"`.
        - Round other values to 3 significant values.
        - If value is less than `0.001`, format it in exponential presentation.
        - If a name ends with `"_ci"`, consider it a confidence interval.
            Look up for attributes `"{name}_lower"` and `"{name}_upper"`,
            and format the interval as `"[{lower_bound}, {lower_bound}]"`.
    """
    return self.to_pretty(keys, formatter).to_string(index=False)

PowerBase #

Bases: ABC, Generic[S], ReprMixin

Base class for the analysis of power.

solve_power(data, parameter='rel_effect_size') abstractmethod #

Solve for a parameter of the power of a test.

Parameters:

Name Type Description Default
data DataFrame | Table

Sample data.

required
parameter Literal['power', 'effect_size', 'rel_effect_size', 'n_obs']

Parameter name.

'rel_effect_size'

Returns:

Type Description
S

Power analysis result.

Source code in src/tea_tasting/metrics/base.py
@abc.abstractmethod
def solve_power(
    self,
    data: pd.DataFrame | ibis.expr.types.Table,
    parameter: Literal[
        "power", "effect_size", "rel_effect_size", "n_obs"] = "rel_effect_size",
) -> S:
    """Solve for a parameter of the power of a test.

    Args:
        data: Sample data.
        parameter: Parameter name.

    Returns:
        Power analysis result.
    """

PowerBaseAggregated #

Bases: PowerBase[S], _HasAggrCols

Base class for the analysis of power using aggregated statistics.

aggr_cols: AggrCols abstractmethod property #

Columns to be aggregated for an analysis.

solve_power(data, parameter='rel_effect_size') #

Solve for a parameter of the power of a test.

Parameters:

Name Type Description Default
data DataFrame | Table | Aggregates

Sample data.

required
parameter Literal['power', 'effect_size', 'rel_effect_size', 'n_obs']

Parameter name.

'rel_effect_size'

Returns:

Type Description
S

Power analysis result.

Source code in src/tea_tasting/metrics/base.py
def solve_power(
    self,
    data: pd.DataFrame | ibis.expr.types.Table | tea_tasting.aggr.Aggregates,
    parameter: Literal[
        "power", "effect_size", "rel_effect_size", "n_obs"] = "rel_effect_size",
) -> S:
    """Solve for a parameter of the power of a test.

    Args:
        data: Sample data.
        parameter: Parameter name.

    Returns:
        Power analysis result.
    """
    if not isinstance(data, tea_tasting.aggr.Aggregates):
        data = tea_tasting.aggr.read_aggregates(
            data=data,
            group_col=None,
            **self.aggr_cols._asdict(),
        )
    return self.solve_power_from_aggregates(data=data, parameter=parameter)

solve_power_from_aggregates(data, parameter='rel_effect_size') abstractmethod #

Solve for a parameter of the power of a test.

Parameters:

Name Type Description Default
data Aggregates

Sample data.

required
parameter Literal['power', 'effect_size', 'rel_effect_size', 'n_obs']

Parameter name.

'rel_effect_size'

Returns:

Type Description
S

Power analysis result.

Source code in src/tea_tasting/metrics/base.py
@abc.abstractmethod
def solve_power_from_aggregates(
    self,
    data: tea_tasting.aggr.Aggregates,
    parameter: Literal[
        "power", "effect_size", "rel_effect_size", "n_obs"] = "rel_effect_size",
) -> S:
    """Solve for a parameter of the power of a test.

    Args:
        data: Sample data.
        parameter: Parameter name.

    Returns:
        Power analysis result.
    """

aggregate_by_variants(data, aggr_cols, variant=None) #

Aggregate experimental data by variants.

Parameters:

Name Type Description Default
data DataFrame | Table | dict[Any, Aggregates]

Experimental data.

required
aggr_cols AggrCols

Columns to be aggregated.

required
variant str | None

Variant column name.

None

Raises:

Type Description
ValueError

The variant parameter is required but was not provided.

TypeError

data is not an instance of DataFrame, Table, or a dictionary of Aggregates.

Returns:

Type Description
dict[Any, Aggregates]

Experimental data as a dictionary of Aggregates.

Source code in src/tea_tasting/metrics/base.py
def aggregate_by_variants(
    data: pd.DataFrame | ibis.expr.types.Table | dict[Any, tea_tasting.aggr.Aggregates],
    aggr_cols: AggrCols,
    variant: str | None = None,
) ->  dict[Any, tea_tasting.aggr.Aggregates]:
    """Aggregate experimental data by variants.

    Args:
        data: Experimental data.
        aggr_cols: Columns to be aggregated.
        variant: Variant column name.

    Raises:
        ValueError: The variant parameter is required but was not provided.
        TypeError: data is not an instance of DataFrame, Table,
            or a dictionary of Aggregates.

    Returns:
        Experimental data as a dictionary of Aggregates.
    """
    if isinstance(data, dict) and all(
        isinstance(v, tea_tasting.aggr.Aggregates) for v in data.values()  # type: ignore
    ):
        return data

    if variant is None:
        raise ValueError("The variant parameter is required but was not provided.")

    if not isinstance(data, pd.DataFrame | ibis.expr.types.Table):
        raise TypeError(
            f"data is a {type(data)}, but must be an instance of"
            " DataFrame, Table, or a dictionary of Aggregates.",
        )

    return tea_tasting.aggr.read_aggregates(
        data=data,
        group_col=variant,
        **aggr_cols._asdict(),
    )

read_dataframes(data, cols, variant=None) #

Read granular experimental data.

Parameters:

Name Type Description Default
data DataFrame | Table | dict[Any, DataFrame]

Experimental data.

required
cols Sequence[str]

Columns to read.

required
variant str | None

Variant column name.

None

Raises:

Type Description
ValueError

The variant parameter is required but was not provided.

TypeError

data is not an instance of DataFrame, Table, or a dictionary if DataFrames.

Returns:

Type Description
dict[Any, DataFrame]

Experimental data as a dictionary of DataFrames.

Source code in src/tea_tasting/metrics/base.py
def read_dataframes(
    data: pd.DataFrame | ibis.expr.types.Table | dict[Any, pd.DataFrame],
    cols: Sequence[str],
    variant: str | None = None,
) -> dict[Any, pd.DataFrame]:
    """Read granular experimental data.

    Args:
        data: Experimental data.
        cols: Columns to read.
        variant: Variant column name.

    Raises:
        ValueError: The variant parameter is required but was not provided.
        TypeError: data is not an instance of DataFrame, Table,
            or a dictionary if DataFrames.

    Returns:
        Experimental data as a dictionary of DataFrames.
    """
    if isinstance(data, dict) and all(
        isinstance(v, pd.DataFrame) for v in data.values()  # type: ignore
    ):
        return data

    if variant is None:
        raise ValueError("The variant parameter is required but was not provided.")

    if isinstance(data, ibis.expr.types.Table):
        data = data.select(*cols, variant).to_pandas()

    if not isinstance(data, pd.DataFrame):
        raise TypeError(
            f"data is a {type(data)}, but must be an instance of"
            " DataFrame, Table, or a dictionary if DataFrames.",
        )

    return dict(tuple(data.loc[:, [*cols, variant]].groupby(variant)))