Skip to content

Model

Models for the joint distribution of weekly calendar data.

model = LatentCalendar(n_components=3, random_state=42)

X = df_wide.to_numpy()
model.fit(X)

X_latent = model.transform(X)
X_pred = model.predict(X)

ConjugateModel

Bases: BaseEstimator, TransformerMixin

Conjugate model for the calendar joint distribution.

This is a wrapper around the conjugate model for the multinomial distribution. It is a wrapper around the Dirichlet distribution.

This doesn't use dimensionality reduction, but it does use the conjugate model.

Parameters:

Name Type Description Default
a ndarray | None

(n_times,) prior for each hour of the day. If None, then the prior is the average of the data.

None
Source code in latent_calendar/model/latent_calendar.py
class ConjugateModel(BaseEstimator, TransformerMixin):
    """Conjugate model for the calendar joint distribution.

    This is a wrapper around the conjugate model for the multinomial
    distribution. It is a wrapper around the Dirichlet distribution.

    This doesn't use dimensionality reduction, but it does use the
    conjugate model.

    Args:
        a: (n_times,) prior for each hour of the day. If None, then
            the prior is the average of the data.

    """

    def __init__(self, a: np.ndarray | None = None) -> None:
        self.a = a

    def fit(self, X, y=None) -> "ConjugateModel":
        """Fit the conjugate model."""
        if self.a is None:
            self.a = hourly_prior(X)

        self.prior_ = Dirichlet(alpha=self.a)
        return self

    def transform(self, X, y=None) -> np.ndarray:
        return multinomial_dirichlet(x=X, prior=self.prior_).dist.mean()

    def predict(self, X, y=None) -> np.ndarray:
        return self.transform(X, y=y)

fit(X, y=None)

Fit the conjugate model.

Source code in latent_calendar/model/latent_calendar.py
def fit(self, X, y=None) -> "ConjugateModel":
    """Fit the conjugate model."""
    if self.a is None:
        self.a = hourly_prior(X)

    self.prior_ = Dirichlet(alpha=self.a)
    return self

DummyModel

Bases: LatentCalendar

Return even probability of a latent.

This can be used as the worse possible baseline.

Source code in latent_calendar/model/latent_calendar.py
class DummyModel(LatentCalendar):
    """Return even probability of a latent.

    This can be used as the worse possible baseline.

    """

    def fit(self, X, y=None) -> "DummyModel":
        """All components are equal probabilty of every hour."""
        # Even probabilty for every thing
        self.n_components = 1
        TIME_SLOTS = X.shape[1]
        EVEN_PROBABILITY = 1 / TIME_SLOTS
        self.components_ = np.ones((self.n_components, TIME_SLOTS)) * EVEN_PROBABILITY

        return self

    def transform(self, X, y=None) -> np.ndarray:
        """Everyone has equal probability of being in each group."""
        nrows = len(X)

        return np.ones((nrows, self.n_components)) / self.n_components

    @classmethod
    def create(cls) -> "DummyModel":
        """Return a dummy model ready for transforming and predicting."""
        model = cls()
        model.fit(X=None)

        return model

    @classmethod
    def from_prior(cls, prior: np.ndarray | pd.Series) -> "DummyModel":
        """Return a dummy model from a prior.

        Args:
            prior: prior probability weights over time slots. Can be a numpy
                array of shape (n_time_slots,) or a segment Series (e.g. from
                `create_box_segment`) with a FULL_VOCAB-compatible index.

        Returns:
            DummyModel with a single component defined by the prior.

        Example:
            Build a model that concentrates on weekday mornings:

            ```python
            from latent_calendar import DummyModel
            from latent_calendar.segments import create_box_segment

            mornings = create_box_segment(
                day_start=0, day_end=5, hour_start=7, hour_end=10,
                name="Weekday mornings",
            )
            model = DummyModel.from_prior(mornings)
            sampler = model.create_sampler(random_state=0)
            df_weights, df_events = sampler.sample(n_samples=[20, 30, 15])
            ```

        """
        if isinstance(prior, pd.Series):
            prior = prior.to_numpy()

        model = cls()
        model.components_ = prior[np.newaxis, :]
        model.n_components = 1

        return model

    @classmethod
    def from_segments(
        cls,
        df_segments: "pd.DataFrame",
        weights: "np.ndarray | list[float] | None" = None,
    ) -> "DummyModel":
        """Return a multi-component model where each segment is one component.

        Each row of `df_segments` becomes one component in the model. The
        population-level mixture over components is derived from
        `component_distribution_` — by default this weights components
        proportionally to the number of active slots in each segment. Pass
        explicit `weights` to override this.

        Args:
            df_segments: segments DataFrame in wide format, shape
                (n_segments, n_time_slots), e.g. from `stack_segments`.
            weights: optional 1-D array of length n_segments. Scales each
                component's contribution to the population prior. If None,
                weighting is proportional to active slot count per segment.

        Returns:
            DummyModel with one component per segment.

        Example:
            ```python
            from latent_calendar import DummyModel
            from latent_calendar.segments import create_box_segment, stack_segments

            mornings = create_box_segment(
                day_start=0, day_end=5, hour_start=7, hour_end=10, name="Mornings"
            )
            evenings = create_box_segment(
                day_start=0, day_end=5, hour_start=18, hour_end=22, name="Evenings"
            )
            df_segments = stack_segments([mornings, evenings])

            # Equal implicit weight (proportional to active slots)
            model = DummyModel.from_segments(df_segments)

            # Mornings 3x more likely than evenings
            model = DummyModel.from_segments(df_segments, weights=[3, 1])

            sampler = model.create_sampler(random_state=0)
            df_weights, df_events = sampler.sample(n_samples=[10, 20, 15])
            ```

        """
        components = df_segments.to_numpy().astype(float)

        if weights is not None:
            weights = np.asarray(weights, dtype=float)
            components = components * weights[:, np.newaxis]

        model = cls()
        model.components_ = components
        model.n_components = len(df_segments)

        return model

create() classmethod

Return a dummy model ready for transforming and predicting.

Source code in latent_calendar/model/latent_calendar.py
@classmethod
def create(cls) -> "DummyModel":
    """Return a dummy model ready for transforming and predicting."""
    model = cls()
    model.fit(X=None)

    return model

fit(X, y=None)

All components are equal probabilty of every hour.

Source code in latent_calendar/model/latent_calendar.py
def fit(self, X, y=None) -> "DummyModel":
    """All components are equal probabilty of every hour."""
    # Even probabilty for every thing
    self.n_components = 1
    TIME_SLOTS = X.shape[1]
    EVEN_PROBABILITY = 1 / TIME_SLOTS
    self.components_ = np.ones((self.n_components, TIME_SLOTS)) * EVEN_PROBABILITY

    return self

from_prior(prior) classmethod

Return a dummy model from a prior.

Parameters:

Name Type Description Default
prior ndarray | Series

prior probability weights over time slots. Can be a numpy array of shape (n_time_slots,) or a segment Series (e.g. from create_box_segment) with a FULL_VOCAB-compatible index.

required

Returns:

Type Description
DummyModel

DummyModel with a single component defined by the prior.

Example

Build a model that concentrates on weekday mornings:

from latent_calendar import DummyModel
from latent_calendar.segments import create_box_segment

mornings = create_box_segment(
    day_start=0, day_end=5, hour_start=7, hour_end=10,
    name="Weekday mornings",
)
model = DummyModel.from_prior(mornings)
sampler = model.create_sampler(random_state=0)
df_weights, df_events = sampler.sample(n_samples=[20, 30, 15])
Source code in latent_calendar/model/latent_calendar.py
@classmethod
def from_prior(cls, prior: np.ndarray | pd.Series) -> "DummyModel":
    """Return a dummy model from a prior.

    Args:
        prior: prior probability weights over time slots. Can be a numpy
            array of shape (n_time_slots,) or a segment Series (e.g. from
            `create_box_segment`) with a FULL_VOCAB-compatible index.

    Returns:
        DummyModel with a single component defined by the prior.

    Example:
        Build a model that concentrates on weekday mornings:

        ```python
        from latent_calendar import DummyModel
        from latent_calendar.segments import create_box_segment

        mornings = create_box_segment(
            day_start=0, day_end=5, hour_start=7, hour_end=10,
            name="Weekday mornings",
        )
        model = DummyModel.from_prior(mornings)
        sampler = model.create_sampler(random_state=0)
        df_weights, df_events = sampler.sample(n_samples=[20, 30, 15])
        ```

    """
    if isinstance(prior, pd.Series):
        prior = prior.to_numpy()

    model = cls()
    model.components_ = prior[np.newaxis, :]
    model.n_components = 1

    return model

from_segments(df_segments, weights=None) classmethod

Return a multi-component model where each segment is one component.

Each row of df_segments becomes one component in the model. The population-level mixture over components is derived from component_distribution_ — by default this weights components proportionally to the number of active slots in each segment. Pass explicit weights to override this.

Parameters:

Name Type Description Default
df_segments DataFrame

segments DataFrame in wide format, shape (n_segments, n_time_slots), e.g. from stack_segments.

required
weights ndarray | list[float] | None

optional 1-D array of length n_segments. Scales each component's contribution to the population prior. If None, weighting is proportional to active slot count per segment.

None

Returns:

Type Description
DummyModel

DummyModel with one component per segment.

Example
from latent_calendar import DummyModel
from latent_calendar.segments import create_box_segment, stack_segments

mornings = create_box_segment(
    day_start=0, day_end=5, hour_start=7, hour_end=10, name="Mornings"
)
evenings = create_box_segment(
    day_start=0, day_end=5, hour_start=18, hour_end=22, name="Evenings"
)
df_segments = stack_segments([mornings, evenings])

# Equal implicit weight (proportional to active slots)
model = DummyModel.from_segments(df_segments)

# Mornings 3x more likely than evenings
model = DummyModel.from_segments(df_segments, weights=[3, 1])

sampler = model.create_sampler(random_state=0)
df_weights, df_events = sampler.sample(n_samples=[10, 20, 15])
Source code in latent_calendar/model/latent_calendar.py
@classmethod
def from_segments(
    cls,
    df_segments: "pd.DataFrame",
    weights: "np.ndarray | list[float] | None" = None,
) -> "DummyModel":
    """Return a multi-component model where each segment is one component.

    Each row of `df_segments` becomes one component in the model. The
    population-level mixture over components is derived from
    `component_distribution_` — by default this weights components
    proportionally to the number of active slots in each segment. Pass
    explicit `weights` to override this.

    Args:
        df_segments: segments DataFrame in wide format, shape
            (n_segments, n_time_slots), e.g. from `stack_segments`.
        weights: optional 1-D array of length n_segments. Scales each
            component's contribution to the population prior. If None,
            weighting is proportional to active slot count per segment.

    Returns:
        DummyModel with one component per segment.

    Example:
        ```python
        from latent_calendar import DummyModel
        from latent_calendar.segments import create_box_segment, stack_segments

        mornings = create_box_segment(
            day_start=0, day_end=5, hour_start=7, hour_end=10, name="Mornings"
        )
        evenings = create_box_segment(
            day_start=0, day_end=5, hour_start=18, hour_end=22, name="Evenings"
        )
        df_segments = stack_segments([mornings, evenings])

        # Equal implicit weight (proportional to active slots)
        model = DummyModel.from_segments(df_segments)

        # Mornings 3x more likely than evenings
        model = DummyModel.from_segments(df_segments, weights=[3, 1])

        sampler = model.create_sampler(random_state=0)
        df_weights, df_events = sampler.sample(n_samples=[10, 20, 15])
        ```

    """
    components = df_segments.to_numpy().astype(float)

    if weights is not None:
        weights = np.asarray(weights, dtype=float)
        components = components * weights[:, np.newaxis]

    model = cls()
    model.components_ = components
    model.n_components = len(df_segments)

    return model

transform(X, y=None)

Everyone has equal probability of being in each group.

Source code in latent_calendar/model/latent_calendar.py
def transform(self, X, y=None) -> np.ndarray:
    """Everyone has equal probability of being in each group."""
    nrows = len(X)

    return np.ones((nrows, self.n_components)) / self.n_components

LatentCalendar

Bases: LatentDirichletAllocation

Model weekly calendar data as a mixture of multinomial distributions.

Adapted from sklearn's Latent Dirichlet Allocation model.

Provides a predict method that returns the marginal probability of each time slot for a given row and a transform method that returns the latent representation of each row.

Source code in latent_calendar/model/latent_calendar.py
class LatentCalendar(BaseLDA):
    """Model weekly calendar data as a mixture of multinomial distributions.

    Adapted from sklearn's [Latent Dirichlet Allocation](https://scikit-learn.org/stable/modules/generated/sklearn.decomposition.LatentDirichletAllocation.html) model.

    Provides a `predict` method that returns the marginal probability of each time slot for a given row and
    a `transform` method that returns the latent representation of each row.

    """

    @property
    def normalized_components_(self) -> np.ndarray:
        """Components that each sum to 1."""
        return self.components_ / self.components_.sum(axis=1)[:, np.newaxis]

    def joint_distribution(self, X_latent: np.ndarray) -> np.ndarray:
        """Marginalize out the components."""
        return joint_distribution(
            X_latent=X_latent, components=self.normalized_components_
        )

    def predict(self, X: np.ndarray, y=None) -> np.ndarray:
        r"""Return the marginal probabilities for a given row.

        Marginalize out the loads via law of total probability

        $$P[time=t | Row=r] = \sum_{l=0}^{c} P[time=t | L=l, Row=r] * P[L=l | Row=r]$$

        """
        # (n, n_components)
        X_latent = self.transform(X)

        return self.joint_distribution(X_latent=X_latent)

    @property
    def component_distribution_(self) -> np.ndarray:
        """Population frequency of each component."""
        return self.components_.sum(axis=1) / self.components_.sum()

    def create_sampler(
        self,
        random_state: int | None = None,
        concentration_scale: float = 1.0,
    ):
        """Create a sampler for generating synthetic calendar data.

        Args:
            random_state: seed for reproducibility
            concentration_scale: scale for Gamma-perturbing each user's Dirichlet
                concentration before sampling mixture weights. 1.0 (default) means
                no perturbation — each user draws from the fixed population prior.
                Values > 1.0 increase variance across users' mixture weights.

        Returns:
            LatentCalendarSampler bound to this fitted model

        Example:
            >>> model = LatentCalendar(n_components=5).fit(X)
            >>> sampler = model.create_sampler(random_state=42)
            >>> df_weights, df_events = sampler.sample(n_samples=[10, 5, 20])

        """
        from latent_calendar.generate import LatentCalendarSampler

        return LatentCalendarSampler(
            self, random_state=random_state, concentration_scale=concentration_scale
        )

component_distribution_ property

Population frequency of each component.

normalized_components_ property

Components that each sum to 1.

create_sampler(random_state=None, concentration_scale=1.0)

Create a sampler for generating synthetic calendar data.

Parameters:

Name Type Description Default
random_state int | None

seed for reproducibility

None
concentration_scale float

scale for Gamma-perturbing each user's Dirichlet concentration before sampling mixture weights. 1.0 (default) means no perturbation — each user draws from the fixed population prior. Values > 1.0 increase variance across users' mixture weights.

1.0

Returns:

Type Description

LatentCalendarSampler bound to this fitted model

Example

model = LatentCalendar(n_components=5).fit(X) sampler = model.create_sampler(random_state=42) df_weights, df_events = sampler.sample(n_samples=[10, 5, 20])

Source code in latent_calendar/model/latent_calendar.py
def create_sampler(
    self,
    random_state: int | None = None,
    concentration_scale: float = 1.0,
):
    """Create a sampler for generating synthetic calendar data.

    Args:
        random_state: seed for reproducibility
        concentration_scale: scale for Gamma-perturbing each user's Dirichlet
            concentration before sampling mixture weights. 1.0 (default) means
            no perturbation — each user draws from the fixed population prior.
            Values > 1.0 increase variance across users' mixture weights.

    Returns:
        LatentCalendarSampler bound to this fitted model

    Example:
        >>> model = LatentCalendar(n_components=5).fit(X)
        >>> sampler = model.create_sampler(random_state=42)
        >>> df_weights, df_events = sampler.sample(n_samples=[10, 5, 20])

    """
    from latent_calendar.generate import LatentCalendarSampler

    return LatentCalendarSampler(
        self, random_state=random_state, concentration_scale=concentration_scale
    )

joint_distribution(X_latent)

Marginalize out the components.

Source code in latent_calendar/model/latent_calendar.py
def joint_distribution(self, X_latent: np.ndarray) -> np.ndarray:
    """Marginalize out the components."""
    return joint_distribution(
        X_latent=X_latent, components=self.normalized_components_
    )

predict(X, y=None)

Return the marginal probabilities for a given row.

Marginalize out the loads via law of total probability

\[P[time=t | Row=r] = \sum_{l=0}^{c} P[time=t | L=l, Row=r] * P[L=l | Row=r]\]
Source code in latent_calendar/model/latent_calendar.py
def predict(self, X: np.ndarray, y=None) -> np.ndarray:
    r"""Return the marginal probabilities for a given row.

    Marginalize out the loads via law of total probability

    $$P[time=t | Row=r] = \sum_{l=0}^{c} P[time=t | L=l, Row=r] * P[L=l | Row=r]$$

    """
    # (n, n_components)
    X_latent = self.transform(X)

    return self.joint_distribution(X_latent=X_latent)

MarginalModel

Bases: LatentCalendar

Source code in latent_calendar/model/latent_calendar.py
class MarginalModel(LatentCalendar):
    def fit(self, X, y=None) -> "MarginalModel":
        """Just sum over all the rows."""
        self.n_components = 1
        # (1, n_times)
        self.components_ = X.sum(axis=0)[np.newaxis, :]

        return self

    def transform(self, X, y=None) -> np.ndarray:
        """There is only one component to be a part of."""
        nrows = len(X)

        # (nrows, 1)
        return np.repeat(1, nrows)[:, np.newaxis]

fit(X, y=None)

Just sum over all the rows.

Source code in latent_calendar/model/latent_calendar.py
def fit(self, X, y=None) -> "MarginalModel":
    """Just sum over all the rows."""
    self.n_components = 1
    # (1, n_times)
    self.components_ = X.sum(axis=0)[np.newaxis, :]

    return self

transform(X, y=None)

There is only one component to be a part of.

Source code in latent_calendar/model/latent_calendar.py
def transform(self, X, y=None) -> np.ndarray:
    """There is only one component to be a part of."""
    nrows = len(X)

    # (nrows, 1)
    return np.repeat(1, nrows)[:, np.newaxis]

constant_prior(X, value=1.0)

Return the prior for each hour of the day.

This is the average of all the rows.

Parameters:

Name Type Description Default
X ndarray

(nrows, n_times)

required
Source code in latent_calendar/model/latent_calendar.py
def constant_prior(X: np.ndarray, value: float = 1.0) -> np.ndarray:
    """Return the prior for each hour of the day.

    This is the average of all the rows.

    Args:
        X: (nrows, n_times)
    """
    TIME_SLOTS = X.shape[1]
    return np.repeat(value, TIME_SLOTS)

hourly_prior(X)

Return the prior for each hour of the day.

This is the average of all the rows.

Parameters:

Name Type Description Default
X ndarray

(nrows, n_times)

required

Returns:

Type Description
ndarray

(n_times,)

Source code in latent_calendar/model/latent_calendar.py
def hourly_prior(X: np.ndarray) -> np.ndarray:
    """Return the prior for each hour of the day.

    This is the average of all the rows.

    Args:
        X: (nrows, n_times)

    Returns:
        (n_times,)

    """
    return (X > 0).sum(axis=0) / len(X)

joint_distribution(X_latent, components)

Marginalize out the components.

Source code in latent_calendar/model/latent_calendar.py
def joint_distribution(X_latent: np.ndarray, components: np.ndarray) -> np.ndarray:
    """Marginalize out the components."""
    return X_latent @ components

predict_on_dataframe(df, model)

Small wrapper to predict on DataFrame and keep same columns and index.

Source code in latent_calendar/model/utils.py
def predict_on_dataframe(df: pd.DataFrame, model: LatentCalendar) -> pd.DataFrame:
    """Small wrapper to predict on DataFrame and keep same columns and index."""
    return pd.DataFrame(
        model.predict(df.to_numpy()), columns=df.columns, index=df.index
    )

transform_on_dataframe(df, model)

Small wrapper to transform on DataFrame and keep index.

Source code in latent_calendar/model/utils.py
6
7
8
def transform_on_dataframe(df: pd.DataFrame, model: LatentCalendar) -> pd.DataFrame:
    """Small wrapper to transform on DataFrame and keep index."""
    return pd.DataFrame(model.transform(df.to_numpy()), index=df.index)

Comments