Back to Statsmodels

Mediation Survival

examples/notebooks/mediation_survival.ipynb

0.15.0.dev02.5 KB
Original Source

Mediation analysis with duration data

This notebook demonstrates mediation analysis when the mediator and outcome are duration variables, modeled using proportional hazards regression. These examples are based on simulated data.

python
import pandas as pd
import numpy as np
import statsmodels.api as sm
from statsmodels.stats.mediation import Mediation

Make the notebook reproducible.

python
np.random.seed(3424)

Specify a sample size.

python
n = 1000

Generate an exposure variable.

python
exp = np.random.normal(size=n)

Generate a mediator variable.

python
def gen_mediator():
    mn = np.exp(exp)
    mtime0 = -mn * np.log(np.random.uniform(size=n))
    ctime = -2 * mn * np.log(np.random.uniform(size=n))
    mstatus = (ctime >= mtime0).astype(int)
    mtime = np.where(mtime0 <= ctime, mtime0, ctime)
    return mtime0, mtime, mstatus

Generate an outcome variable.

python
def gen_outcome(otype, mtime0):
    if otype == "full":
        lp = 0.5 * mtime0
    elif otype == "no":
        lp = exp
    else:
        lp = exp + mtime0
    mn = np.exp(-lp)
    ytime0 = -mn * np.log(np.random.uniform(size=n))
    ctime = -2 * mn * np.log(np.random.uniform(size=n))
    ystatus = (ctime >= ytime0).astype(int)
    ytime = np.where(ytime0 <= ctime, ytime0, ctime)
    return ytime, ystatus

Build a dataframe containing all the relevant variables.

python
def build_df(ytime, ystatus, mtime0, mtime, mstatus):
    df = pd.DataFrame(
        {
            "ytime": ytime,
            "ystatus": ystatus,
            "mtime": mtime,
            "mstatus": mstatus,
            "exp": exp,
        }
    )
    return df

Run the full simulation and analysis, under a particular population structure of mediation.

python
def run(otype):

    mtime0, mtime, mstatus = gen_mediator()
    ytime, ystatus = gen_outcome(otype, mtime0)
    df = build_df(ytime, ystatus, mtime0, mtime, mstatus)

    outcome_model = sm.PHReg.from_formula(
        "ytime ~ exp + mtime", status="ystatus", data=df
    )
    mediator_model = sm.PHReg.from_formula("mtime ~ exp", status="mstatus", data=df)

    med = Mediation(
        outcome_model,
        mediator_model,
        "exp",
        "mtime",
        outcome_predict_kwargs={"pred_only": True},
    )
    med_result = med.fit(n_rep=20)
    print(med_result.summary())

Run the example with full mediation

python
run("full")

Run the example with partial mediation

python
run("partial")

Run the example with no mediation

python
run("no")