## ----include = FALSE----------------------------------------------------------
knitr::opts_chunk$set(
	collapse = TRUE,
	comment = "#>",
	fig.width = 7,
	fig.height = 5
)

## ----setup--------------------------------------------------------------------
library(xplainfi)
library(DiagrammeR)
library(mlr3learners)
set.seed(123)

## ----overview-table, echo=FALSE-----------------------------------------------
dgp_overview <- data.frame(
	DGP = c(
		"sim_dgp_correlated",
		"sim_dgp_mediated",
		"sim_dgp_confounded",
		"sim_dgp_interactions",
		"sim_dgp_independent",
		"sim_dgp_ewald"
	),
	Challenge = c(
		"Spurious correlation",
		"Mediation effects",
		"Confounding",
		"Interaction effects",
		"Baseline (no challenges)",
		"Mixed effects"
	),
	`PFI Behavior` = c(
		"High for spurious x2",
		"Shows total effects",
		"Biased upward",
		"Low (no main effects)",
		"Accurate",
		"Mixed"
	),
	`CFI Behavior` = c(
		"Low for spurious x2",
		"Shows direct effects",
		"Less biased",
		"High (captures interactions)",
		"Accurate",
		"Mixed"
	),
	check.names = FALSE
)
knitr::kable(dgp_overview, caption = "Overview of simulation settings and expected method behavior")

## ----dag-correlated, echo=FALSE, fig.cap="DAG for correlated features DGP", fig.width=10, fig.height=4----
grViz(
	"
  digraph Correlated {
    rankdir=LR;
    graph [ranksep=1.5];
    node [shape=circle, style=filled, fontsize=14, width=1.2];
    
    X1 [fillcolor='lightcoral', label='X₁\n(β=2.0)'];
    X2 [fillcolor='pink', label='X₂\n(β=0)'];
    X3 [fillcolor='lightblue', label='X₃\n(β=1.0)'];
    X4 [fillcolor='lightgray', label='X₄\n(β=0)'];
    Y [fillcolor='greenyellow', label='Y', width=1.5];
    
    X1 -> X2 [color=red, style=bold, label='r≈0.9'];
    X1 -> Y [label='2.0'];
    X2 -> Y [style=dashed, color=gray, label='0'];
    X3 -> Y [label='1.0'];
    X4 -> Y [style=dashed, color=gray];
    
    {rank=source; X1; X3; X4}
    {rank=same; X2}
    {rank=sink; Y}
  }"
)

## ----correlated-example-------------------------------------------------------
set.seed(123)
task <- sim_dgp_correlated(n = 500)

# Check correlation between X1 and X2
cor(task$data()[, c("x1", "x2")])

# True coefficients: x1=2.0, x2=0, x3=1.0, x4=0
# Note: x2 is highly correlated with x1 but has NO causal effect!

## ----dag-mediated, echo=FALSE, fig.cap="DAG for mediated effects DGP", fig.width=10, fig.height=4----
grViz(
	"
  digraph Mediated {
    rankdir=LR;
    graph [ranksep=1.2];
    node [shape=circle, style=filled, fontsize=14, width=1.2];
    
    E [fillcolor='orange', label='Exposure\n(β=0)'];
    D [fillcolor='lightblue', label='Direct\n(β=0.5)'];
    M [fillcolor='yellow', label='Mediator\n(β=1.5)'];
    N [fillcolor='lightgray', label='Noise\n(β=0)'];
    Y [fillcolor='greenyellow', label='Y', width=1.5];
    
    E -> M [label='0.8', color=purple, penwidth=2];
    D -> M [label='0.6', color=blue];
    D -> Y [label='0.5', color=blue];
    M -> Y [label='1.5', color=purple, penwidth=2];
    N -> Y [style=dashed, color=gray];
    
    {rank=source; E; D; N}
    {rank=same; M}
    {rank=sink; Y}
  }"
)

## ----mediated-example---------------------------------------------------------
set.seed(123)
task <- sim_dgp_mediated(n = 500)

# Calculate total effect of exposure
# Total effect = 0.8 * 1.5 = 1.2 (through mediator)
# Direct effect = 0 (no direct path to Y)

## ----dag-confounded, echo=FALSE, fig.cap="DAG for confounding DGP", fig.width=10, fig.height=5----
grViz(
	"
  digraph Confounded {
    rankdir=LR;
    graph [ranksep=1.2, nodesep=0.8];
    node [shape=circle, style=filled, fontsize=14, width=1.2];

    H [fillcolor='red', label='H\n(Confounder)', style='filled,dashed'];
    X1 [fillcolor='lightcoral', label='X₁\n(β=1.0)'];
    P [fillcolor='pink', label='Proxy\n(β=0)'];
    I [fillcolor='lightblue', label='Independent\n(β=1.0)'];
    Y [fillcolor='greenyellow', label='Y', width=1.5];

    H -> X1 [color=red, label='1.0'];
    H -> P [color=red, style=dashed, label='1.0'];
    H -> Y [color=red, label='1.0', penwidth=2];
    X1 -> Y [label='1.0'];
    I -> Y [label='1.0'];

    {rank=source; H}
    {rank=same; X1; P; I}
    {rank=sink; Y}
  }"
)

## ----confounded-example-------------------------------------------------------
set.seed(123)
# Hidden confounder scenario (default)
task_hidden <- sim_dgp_confounded(n = 500, hidden = TRUE)
task_hidden$feature_names # proxy available but not confounder

# Observable confounder scenario
task_observed <- sim_dgp_confounded(n = 500, hidden = FALSE)
task_observed$feature_names # both confounder and proxy available

## ----dag-interactions, echo=FALSE, fig.cap="DAG for interaction effects DGP", fig.width=10, fig.height=4----
grViz(
	"
  digraph Interaction {
    rankdir=LR;
    graph [ranksep=1.2];
    node [shape=circle, style=filled, fontsize=14, width=1.2];
    
    X1 [fillcolor='orange', label='X₁\n(β=0)'];
    X2 [fillcolor='orange', label='X₂\n(β=0)'];
    X3 [fillcolor='lightblue', label='X₃\n(β=1.0)'];
    N1 [fillcolor='lightgray', label='N₁\n(β=0)'];
    N2 [fillcolor='lightgray', label='N₂\n(β=0)'];
    Y [fillcolor='greenyellow', label='Y', width=1.5];
    INT [fillcolor='red', shape=diamond, label='X₁×X₂\n(β=2.0)', width=1.5];
    
    X1 -> INT [color=red, penwidth=2];
    X2 -> INT [color=red, penwidth=2];
    INT -> Y [color=red, label='2.0', penwidth=2];
    X3 -> Y [label='1.0'];
    N1 -> Y [style=dashed, color=gray];
    N2 -> Y [style=dashed, color=gray];
    
    {rank=source; X1; X2; X3; N1; N2}
    {rank=same; INT}
    {rank=sink; Y}
  }"
)

## ----interactions-example-----------------------------------------------------
set.seed(123)
task <- sim_dgp_interactions(n = 500)

# Note: X1 and X2 have NO main effects
# Their importance comes ONLY through their interaction

## ----dag-independent, echo=FALSE, fig.cap="DAG for independent features DGP", fig.width=10, fig.height=4----
grViz(
	"
  digraph Independent {
    rankdir=LR;
    graph [ranksep=1.5];
    node [shape=circle, style=filled, fontsize=14, width=1.2];
    
    X1 [fillcolor='lightblue', label='X₁\n(β=2.0)'];
    X2 [fillcolor='lightblue', label='X₂\n(β=1.0)'];
    X3 [fillcolor='lightblue', label='X₃\n(β=0.5)'];
    N1 [fillcolor='lightgray', label='N₁\n(β=0)'];
    N2 [fillcolor='lightgray', label='N₂\n(β=0)'];
    Y [fillcolor='greenyellow', label='Y', width=1.5];
    
    X1 -> Y [label='2.0', penwidth=3];
    X2 -> Y [label='1.0', penwidth=2];
    X3 -> Y [label='0.5'];
    N1 -> Y [style=dashed, color=gray];
    N2 -> Y [style=dashed, color=gray];
    
    {rank=source; X1; X2; X3; N1; N2}
    {rank=sink; Y}
  }"
)

## ----independent-example------------------------------------------------------
set.seed(123)
task <- sim_dgp_independent(n = 500)

# All methods should rank features consistently:
# important1 > important2 > important3 > unimportant1,2 (approx. 0)

## ----dag-ewald, echo=FALSE, fig.cap="DAG for Ewald et al. (2024) DGP", fig.width=10, fig.height=4----
grViz(
	"
  digraph Ewald {
    rankdir=LR;
    graph [ranksep=1.2];
    node [shape=circle, style=filled, fontsize=14, width=1.2];
    
    X1 [fillcolor='lightgray', label='X₁\n(β=0)'];
    X2 [fillcolor='lightgray', label='X₂\n(β=0)'];
    X3 [fillcolor='lightgray', label='X₃\n(β=0)'];
    X4 [fillcolor='lightblue', label='X₄\n(β=1.0)'];
    X5 [fillcolor='lightblue', label='X₅\n(β=1.0)'];
    Y [fillcolor='greenyellow', label='Y', width=1.5];
    INT [fillcolor='red', shape=diamond, label='X₄×X₅\n(β=1.0)', width=1.5];
    
    X1 -> X2 [color=gray, label='≈1.0'];
    X3 -> X4 [color=gray, label='≈1.0'];
    X4 -> Y [label='1.0'];
    X5 -> Y [label='1.0'];
    X4 -> INT [color=red];
    X5 -> INT [color=red];
    INT -> Y [color=red, label='1.0'];
    
    {rank=source; X1; X3; X5}
    {rank=same; X2; X4}
    {rank=same; INT}
    {rank=sink; Y}
  }"
)

## ----ewald-example------------------------------------------------------------
sim_dgp_ewald(n = 500)