Ancestor Regression (AncReg) is a package with methods to test for ancestral connections in linear structural equation models (C. Schultheiss and Bühlmann (2023)) and structural vector autoregressive models (Christoph Schultheiss, Ulmer, and Bühlmann (2025)). Ancestor Regression provides explicit error control for false causal discovery, at least asymptotically. To have power, however, it relies on non-Gaussian distributions.
To install the Ancestor Regression R package from CRAN, just run
install.packages(AncReg)
You can install the development version of AncReg from GitHub with:
# install.packages("devtools")
::install_github("markusul/AncReg") devtools
or
# install.packages('pak')
::pkg_install('markusul/AncReg') pak
This is a basic example on how to use Ancestor Regression using some simulated data.
library(AncReg)
# random DAGS for simulation
set.seed(42)
<- 5 #number of nodes
p <- pcalg::randomDAG(p, prob = 0.5)
DAG
<- matrix(0, p, p) # represent DAG as matrix
B for (i in 2:p){
for(j in 1:(i-1)){
# store edge weights
<- max(0, DAG@edgeData@data[[paste(j,"|",i, sep="")]]$weight)
B[i,j]
}
}colnames(B) <- rownames(B) <- LETTERS[1:p]
# solution in terms of noise
<- MASS::ginv(diag(p) - B)
Bprime
<- 5000
n <- matrix(rexp(n * p), ncol = p)
N <- t(Bprime %*% t(N))
X colnames(X) <- LETTERS[1:p]
# fit Ancestor Regression
<- AncReg(X)
fit #> Registered S3 method overwritten by 'quantmod':
#> method from
#> as.zoo.data.frame zoo
fit#> $z.val
#> A.0 B.0 C.0 D.0 E.0
#> A 48.7380204 0.2228364 -0.8016602 0.5490716 -0.4341680
#> B -5.3286343 59.1036440 -1.2511824 -1.2682775 0.4165155
#> C -4.5000966 -8.4766371 53.2254962 0.1683882 1.9111938
#> D -5.0924716 -0.8902521 -1.0268923 75.1415659 0.8641796
#> E -0.2529153 -3.8052342 -1.0547642 1.8153614 61.9014974
#>
#> $p.val
#> A.0 B.0 C.0 D.0 E.0
#> A 0.000000e+00 8.236628e-01 0.4227496 0.58295633 0.66416645
#> B 9.895400e-08 0.000000e+00 0.2108679 0.20469888 0.67703283
#> C 6.792260e-06 2.317966e-17 0.0000000 0.86627792 0.05597968
#> D 3.534257e-07 3.733305e-01 0.3044712 0.00000000 0.38748922
#> E 8.003337e-01 1.416701e-04 0.2915332 0.06946839 0.00000000
#>
#> attr(,"class")
#> [1] "AncReg"
The summary function can be used to collect and organize the p-values. Additionally it returns estimated ancestral graphs.
# collect ancestral p-values and graph
<- summary(fit)
res
res#> $p.val
#> A B C D E
#> A 1.000000e+00 8.236628e-01 0.4227496 0.58295633 0.66416645
#> B 9.895400e-08 1.000000e+00 0.2108679 0.20469888 0.67703283
#> C 6.792260e-06 2.317966e-17 1.0000000 0.86627792 0.05597968
#> D 3.534257e-07 3.733305e-01 0.3044712 1.00000000 0.38748922
#> E 8.003337e-01 1.416701e-04 0.2915332 0.06946839 1.00000000
#>
#> $graph
#> A B C D E
#> A FALSE FALSE FALSE FALSE FALSE
#> B TRUE FALSE FALSE FALSE FALSE
#> C TRUE TRUE FALSE FALSE FALSE
#> D TRUE FALSE FALSE FALSE FALSE
#> E TRUE TRUE FALSE FALSE FALSE
#>
#> $alpha
#> [1] 0.05
#>
#> attr(,"class")
#> [1] "summary.AncReg"
As we know the truth in the simulated model, we can compare the estimated ancestral graph with the true one.
#compare true and estimated ancestral graph
<- igraph::graph_from_adjacency_matrix(recAncestor(B != 0))
trueGraph <- igraph::graph_from_adjacency_matrix(res$graph)
ancGraph
#same layout for both graphs
<- igraph::layout_as_tree(trueGraph)
l
par(mfrow = c(1, 2))
plot(trueGraph, main = 'true ancestral graph', vertex.size = 30, layout = l)
plot(ancGraph, main = 'Ancestor Regression', vertex.size = 30, layout = l)
We show an example of a SVAR application using the time series of geyser eruptions. (Christoph Schultheiss, Ulmer, and Bühlmann (2025))
<- MASS::geyser
geyser # shift waiting such that it is waiting after erruption
<- data.frame(waiting = geyser$waiting[-1], duration = geyser$duration[-nrow(geyser)])
geyser2
# fit ancestor regression with 6 lags considered
<- AncReg(as.matrix(geyser2), degree = 6)
fit2 <- summary(fit2)
res2
res2#> $inst.p.val
#> waiting duration
#> waiting 1.0000000 0.0004811719
#> duration 0.5109396 1.0000000000
#>
#> $inst.graph
#> waiting duration
#> waiting FALSE TRUE
#> duration FALSE FALSE
#>
#> $inst.alpha
#> [1] 0.05
#>
#> $sum.p.val
#> waiting duration
#> waiting 1.0000000 0.008733271
#> duration 0.1760936 1.000000000
#>
#> $sum.graph
#> waiting duration
#> waiting FALSE TRUE
#> duration FALSE FALSE
#>
#> attr(,"class")
#> [1] "summary.AncReg"
par(mfrow = c(1, 2))
# visualize instantaneous ancestry
<- igraph::graph_from_adjacency_matrix(res2$inst.graph)
instGraph <- igraph::layout_as_tree(instGraph)
l plot(instGraph, edge.label = round(diag(res2$inst.p.val[1:2, 2:1]), 2),
main = 'instantaneous effects', vertex.size = 90, layout = l)
# visualize summary of lagged ancestry
<- igraph::graph_from_adjacency_matrix(res2$sum.graph)
sumGraph plot(sumGraph, edge.label = round(diag(res2$sum.p.val[1:2, 2:1]), 2),
main = 'summary graph', vertex.size = 90, layout = l)