# -------------------------------------------------------------------------------------------- #
# R SCRIPT FOR SARS-CoV-2 INFECTION FATALITY RISK ESTIMATES AND CONFIDENCE INTERVALS IN SPAIN #
# #
# This code reproduces the analyses performed in "Pastor-Barriuso R, Pérez-Gómez B, Hernán MA, #
# et al. Infection fatality risk for SARS-CoV-2: a nationwide seroepidemiological study in the #
# non-institutionalized population of Spain." #
# #
# -------------------------------------------------------------------------------------------- #
# --------------------------------------------------------------------------------------------
# Non-institutionalized population by sex and 10-year age group (0-9,..., 70-79, >=80 years)
# as of July 15, 2020 (Spanish National Institute of Statistics)
data <- matrix(c(0,0,46887075, # Overall
1,0,23006948, # All men
1,1, 2205512,
1,2, 2557872,
1,3, 2479132,
1,4, 2978715,
1,5, 3916706,
1,6, 3493827,
1,7, 2598212,
1,8, 1783664,
1,9, 993308,
6,0,23880127, # All women
6,1, 2078268,
6,2, 2396681,
6,3, 2404106,
6,4, 3012421,
6,5, 3877832,
6,6, 3563505,
6,7, 2803368,
6,8, 2138101,
6,9, 1605845),
ncol=3,byrow=T,dimnames=list(NULL,c("sex","age.cat","pop")))
# --------------------------------------------------------------------------------------------
# Seroprevalence of SARS-CoV-2 with chemiluminiscent microparticle immunoassay by sex and age group (ENE-COVID study, April 27–June 22, 2020)
# Note: Seropositivity for SARS-CoV-2 with immunoassay in at least one wave (61,092 participants with immunoassay in at least one wave)
# Note: Standard errors and 95% confidence intervals for SARS-CoV-2 seroprevalences accounted for the ENE-COVID complex survey design
prev <- matrix(c(0,0,0.0491966,0.0017192,0.0459315,0.0526809, # Overall
1,0,0.0480707,0.0020358,0.0442308,0.0522256, # All men
1,1,0.0324915,0.0085859,0.0192736,0.0542724,
1,2,0.0365506,0.0050564,0.0278275,0.0478735,
1,3,0.057643 ,0.00604 ,0.0468755,0.0707004,
1,4,0.0469151,0.0048431,0.0382781,0.0573847,
1,5,0.0533711,0.0040498,0.0459619,0.0618972,
1,6,0.0526742,0.0041081,0.0451732,0.0613407,
1,7,0.0489599,0.0046845,0.0405462,0.0590121,
1,8,0.0469499,0.0058422,0.0367305,0.059836 ,
1,9,0.0459105,0.008355 ,0.0320385,0.065383 ,
6,0,0.0502732,0.0019935,0.0465034,0.0543311, # All women
6,1,0.0423334,0.009991 ,0.0265354,0.0668907,
6,2,0.0438496,0.0055864,0.0341075,0.0562125,
6,3,0.057152 ,0.0061229,0.0462606,0.0704183,
6,4,0.0520345,0.0045337,0.0438257,0.0616816,
6,5,0.0533281,0.0040707,0.0458839,0.0619017,
6,6,0.0517485,0.0039211,0.0445749,0.0600041,
6,7,0.0500833,0.004588 ,0.0418114,0.0598894,
6,8,0.0462329,0.0052633,0.0369376,0.0577271,
6,9,0.0499314,0.0078711,0.036566 ,0.067838 ),
ncol=6,byrow=T,dimnames=list(NULL,c("sex","age.cat","prev","prev.se","prev.ll","prev.ul")))
# Merge population with seroprevalence data
data <- merge(data,prev,by=c("sex","age.cat"))
rm(prev)
# --------------------------------------------------------------------------------------------
# Sensitivity analysis: Correction of estimated SARS-CoV-2 seroprevalences for the immunoassay sensitivity and specificity
# Note: Pooled estimates of sensitivity and specificity of the immunoassay (SARS-CoV-2 IgG for use with ARCHITECT, Abbott Laboratories)
# against RT-PCR were obtained from a bivariate random-effects meta-analysis of 23 studies of diagnostic test accuracy
# Logit-transformed pooled estimates of sensitivity and specificity, together with their covariance matrix
logit.accur <- c(2.266413,4.897884)
names(logit.accur) <- c("sens","spec")
var.logit.accur <- matrix(c(0.018066444,0.004085381,
0.004085381,0.026223817),nrow=2,dimnames=list(c("sens","spec"),c("sens","spec")))
# Back-transform the estimates and covariance matrix to the original scale
accur <- exp(logit.accur)/(1+exp(logit.accur))
mat <- diag(exp(logit.accur)/(1+exp(logit.accur))^2)
var.accur <- mat%*%var.logit.accur%*%mat
rm(logit.accur,var.logit.accur,mat)
# Corrected seroprevalences for sensitivity and specificity
data$corr.prev <- (data$prev+accur[2]-1)/(accur[1]+accur[2]-1)
# Standard errors of corrected seroprevalences (delta method)
mat <- cbind(-data$corr.prev,1-data$corr.prev)
data$corr.prev.se <- sqrt(data$prev.se^2+diag(mat%*%var.accur%*%t(mat)))/(accur[1]+accur[2]-1)
rm(accur,var.accur,mat)
# 95% confidence intervals for corrected seroprevalences (based on logit transform)
data$corr.prev.ll <- 1/(1+exp(-(log(data$corr.prev/(1-data$corr.prev))-qt(0.975,1500-183)*data$corr.prev.se/(data$corr.prev*(1-data$corr.prev)))))
data$corr.prev.ul <- 1/(1+exp(-(log(data$corr.prev/(1-data$corr.prev))+qt(0.975,1500-183)*data$corr.prev.se/(data$corr.prev*(1-data$corr.prev)))))
# --------------------------------------------------------------------------------------------
# Estimated number of SARS-CoV-2 infections in non-institutionalized population by sex and age group
data$infect <- data$pop*data$prev
data$infect.se <- data$pop*data$prev.se
data$infect.ll <- data$pop*data$prev.ll
data$infect.ul <- data$pop*data$prev.ul
cbind(format(data$pop/1000,digits=1,nsmall=1,trim=T,big.mark=","), # Population (thousands)
paste(format(100*data$prev,digits=0,nsmall=1,trim=T)," (", # Seroprevalence (%) (95% CI)
format(100*data$prev.ll,digits=0,nsmall=1,trim=T)," to ",
format(100*data$prev.ul,digits=0,nsmall=1,trim=T),")",sep=""),
paste(format(data$infect/1000,digits=0,nsmall=1,trim=T,big.mark=",")," (", # Infections (thousands) (95% CI)
format(data$infect.ll/1000,digits=0,nsmall=1,trim=T,big.mark=",")," to ",
format(data$infect.ul/1000,digits=0,nsmall=1,trim=T,big.mark=","),")",sep=""))
data$corr.infect <- data$pop*data$corr.prev
data$corr.infect.se <- data$pop*data$corr.prev.se
data$corr.infect.ll <- data$pop*data$corr.prev.ll
data$corr.infect.ul <- data$pop*data$corr.prev.ul
cbind(format(data$pop/1000,digits=1,nsmall=1,trim=T,big.mark=","),
paste(format(100*data$corr.prev,digits=0,nsmall=1,trim=T)," (",
format(100*data$corr.prev.ll,digits=0,nsmall=1,trim=T)," to ",
format(100*data$corr.prev.ul,digits=0,nsmall=1,trim=T),")",sep=""),
paste(format(data$corr.infect/1000,digits=0,nsmall=1,trim=T,big.mark=",")," (",
format(data$corr.infect.ll/1000,digits=0,nsmall=1,trim=T,big.mark=",")," to ",
format(data$corr.infect.ul/1000,digits=0,nsmall=1,trim=T,big.mark=","),")",sep=""))
# --------------------------------------------------------------------------------------------
# Number of deaths with confirmed COVID-19 in non-institutionalized population up to July 15, 2020 by sex and age group (SiViES)
death <- matrix(c(0,0,19228, # Overall
1,0,12317, # All men
1,1,3,
1,2,3,
1,3,18,
1,4,48,
1,5,192,
1,6,705,
1,7,1904,
1,8,4145,
1,9,5299,
6,0,6911, # All women
6,1,2,
6,2,3,
6,3,17,
6,4,29,
6,5,103,
6,6,318,
6,7,749,
6,8,1986,
6,9,3704),
ncol=3,byrow=T,dimnames=list(NULL,c("sex","age.cat","death.covid")))
# Merge death data
data <- merge(data,death,by=c("sex","age.cat"))
rm(death)
format(data$death.covid,digits=1,nsmall=0,trim=T,big.mark=",")
# --------------------------------------------------------------------------------------------
# Number of excess all-cause deaths in non-institutionalized population by sex and age group (March 13 - May 22, 2020, MoMo)
death <- matrix(c(0,0,24778, # Overall
1,0,15480, # All men
1,1,32,
1,2,0,
1,3,0,
1,4,3,
1,5,168,
1,6,601,
1,7,2065,
1,8,5114,
1,9,7497,
6,0,9298, # All women
6,1,11,
6,2,22,
6,3,10,
6,4,71,
6,5,91,
6,6,369,
6,7,875,
6,8,2646,
6,9,5203),
ncol=3,byrow=T,dimnames=list(NULL,c("sex","age.cat","death.excess")))
# Merge death data
data <- merge(data,death,by=c("sex","age.cat"))
rm(death)
format(data$death.excess,digits=1,nsmall=0,trim=T,big.mark=",")
# --------------------------------------------------------------------------------------------
# Infection fatality risks in non-institutionalized population by sex and age group
# Note: CI based on delta methods accounting for both binomial variance in the number of deaths and estimated variance in the number of infections
# IFR based on estimated SARS-CoV-2 seroprevalence
ifr.covid <- data$death.covid/data$infect
logit.ifr.covid.var <- 1/data$death.covid + 1/(data$infect-data$death.covid) + data$infect.se^2/(data$infect-data$death.covid)^2
paste(format(100*ifr.covid,digits=0,nsmall=2,trim=T)," (",
format(100/(1+exp(-(log(ifr.covid/(1-ifr.covid))-qt(0.975,1500-183)*sqrt(logit.ifr.covid.var)))),digits=0,nsmall=2,trim=T)," to ",
format(100/(1+exp(-(log(ifr.covid/(1-ifr.covid))+qt(0.975,1500-183)*sqrt(logit.ifr.covid.var)))),digits=0,nsmall=2,trim=T),")",sep="")
death.excess <- data$death.excess + 0.5*(data$death.excess==0) # Continuity correction for IFR with zero deaths
infect <- data$infect + 1*(data$death.excess==0)
ifr.excess <- death.excess/infect
logit.ifr.excess.var <- 1/death.excess + 1/(infect-death.excess) + data$infect.se^2/(infect-death.excess)^2
rm(death.excess,infect)
paste(format(100*ifr.excess,digits=0,nsmall=2,trim=T)," (",
format(100/(1+exp(-(log(ifr.excess/(1-ifr.excess))-qt(0.975,1500-183)*sqrt(logit.ifr.excess.var)))),digits=0,nsmall=2,trim=T)," to ",
format(100/(1+exp(-(log(ifr.excess/(1-ifr.excess))+qt(0.975,1500-183)*sqrt(logit.ifr.excess.var)))),digits=0,nsmall=2,trim=T),")",sep="")
# IFR based on corrected prevalence of SARS-CoV-2 infection
corr.ifr.covid <- data$death.covid/data$corr.infect
logit.corr.ifr.covid.var <- 1/data$death.covid + 1/(data$corr.infect-data$death.covid) + data$corr.infect.se^2/(data$corr.infect-data$death.covid)^2
paste(format(100*corr.ifr.covid,digits=0,nsmall=2,trim=T)," (",
format(100/(1+exp(-(log(corr.ifr.covid/(1-corr.ifr.covid))-qt(0.975,1500-183)*sqrt(logit.corr.ifr.covid.var)))),digits=0,nsmall=2,trim=T)," to ",
format(100/(1+exp(-(log(corr.ifr.covid/(1-corr.ifr.covid))+qt(0.975,1500-183)*sqrt(logit.corr.ifr.covid.var)))),digits=0,nsmall=2,trim=T),")",sep="")
death.excess <- data$death.excess + 0.5*(data$death.excess==0) # Continuity correction for IFR with zero deaths
corr.infect <- data$corr.infect + 1*(data$death.excess==0)
corr.ifr.excess <- death.excess/corr.infect
logit.corr.ifr.excess.var <- 1/death.excess + 1/(corr.infect-death.excess) + data$corr.infect.se^2/(corr.infect-death.excess)^2
rm(death.excess,corr.infect)
paste(format(100*corr.ifr.excess,digits=0,nsmall=2,trim=T)," (",
format(100/(1+exp(-(log(corr.ifr.excess/(1-corr.ifr.excess))-qt(0.975,1500-183)*sqrt(logit.corr.ifr.excess.var)))),digits=0,nsmall=2,trim=T)," to ",
format(100/(1+exp(-(log(corr.ifr.excess/(1-corr.ifr.excess))+qt(0.975,1500-183)*sqrt(logit.corr.ifr.excess.var)))),digits=0,nsmall=2,trim=T),")",sep="")
# --------------------------------------------------------------------------------------------
# Figure 1: Infection fatality risks based on estimated SARS-CoV-2 seroprevalence
age <- c(seq(5,75,10),90)
ifr.covid.m <- ifr.covid[3:11]
logit.ifr.covid.m.var <- logit.ifr.covid.var[3:11]
ifr.covid.w <- ifr.covid[13:21]
logit.ifr.covid.w.var <- logit.ifr.covid.var[13:21]
ifr.excess.m <- ifr.excess[3:11]
logit.ifr.excess.m.var <- logit.ifr.excess.var[3:11]
ifr.excess.w <- ifr.excess[13:21]
logit.ifr.excess.w.var <- logit.ifr.excess.var[13:21]
pdf(file="Figure 1.pdf",
width=8.5/2.54,height=12/2.54,pointsize=6)
par(mfrow=c(2,1),mar=c(3.25,3.75,1,0),oma=c(0,0,0.35,0),tcl=-0.35)
plot(age,ifr.covid.m,type="n",xlim=c(0,90),ylim=c(0,0.22),xlab="",ylab="",axes=F)
polygon(c(age,rev(age)),
c( 1/(1+exp(-(log(ifr.covid.m/(1-ifr.covid.m))-qt(0.975,1500-183)*sqrt(logit.ifr.covid.m.var)))),
rev(1/(1+exp(-(log(ifr.covid.m/(1-ifr.covid.m))+qt(0.975,1500-183)*sqrt(logit.ifr.covid.m.var)))))),
density=-1,col=adjustcolor("deepskyblue2",alpha.f=0.25),border=NA)
polygon(c(age,rev(age)),
c( 1/(1+exp(-(log(ifr.covid.w/(1-ifr.covid.w))-qt(0.975,1500-183)*sqrt(logit.ifr.covid.w.var)))),
rev(1/(1+exp(-(log(ifr.covid.w/(1-ifr.covid.w))+qt(0.975,1500-183)*sqrt(logit.ifr.covid.w.var)))))),
density=-1,col=adjustcolor("red3",alpha.f=0.25),border=NA)
lines(age,ifr.covid.m,lty=1,lwd=2,col="deepskyblue2")
lines(age,ifr.covid.w,lty=1,lwd=2,col="red3")
axis(1,at=seq(0,90,10),labels=seq(0,90,10),cex.axis=1,font.axis=1,mgp=c(3,0.50,0),lty=1,lwd=1)
axis(2,at=seq(0,0.22,0.02),labels=seq(0,22,2),cex.axis=1,font.axis=1,mgp=c(3,0.65,0),las=1,adj=1,lty=1,lwd=1)
mtext(side=1,"Age (years)",cex=1,font=1,line=1.75)
mtext(side=2,"Infection fatality risk for SARS-CoV-2 (%)",cex=1,font=1,las=3,line=2.25)
mtext(side=2,at=0.235,"A",cex=1.25,font=1,las=1,line=2.35)
mtext(side=2,at=0.235,"Deaths with confirmed COVID-19",cex=1,font=1,las=1,adj=0,line=0)
legend(0,0.22,inset=0,bty="n",legend=c("Men","Women"),
lty=1,lwd=2,col=c("deepskyblue2","red3"),cex=1,y.intersp=1.25)
plot(age,ifr.excess.m,type="n",xlim=c(0,90),ylim=c(0,0.22),xlab="",ylab="",axes=F)
polygon(c(age,rev(age)),
c( 1/(1+exp(-(log(ifr.excess.m/(1-ifr.excess.m))-qt(0.975,1500-183)*sqrt(logit.ifr.excess.m.var)))),
rev(1/(1+exp(-(log(ifr.excess.m/(1-ifr.excess.m))+qt(0.975,1500-183)*sqrt(logit.ifr.excess.m.var)))))),
density=-1,col=adjustcolor("deepskyblue2",alpha.f=0.25),border=NA)
polygon(c(age,rev(age)),
c( 1/(1+exp(-(log(ifr.excess.w/(1-ifr.excess.w))-qt(0.975,1500-183)*sqrt(logit.ifr.excess.w.var)))),
rev(1/(1+exp(-(log(ifr.excess.w/(1-ifr.excess.w))+qt(0.975,1500-183)*sqrt(logit.ifr.excess.w.var)))))),
density=-1,col=adjustcolor("red3",alpha.f=0.25),border=NA)
lines(age,ifr.excess.m,lty=1,lwd=2,col="deepskyblue2")
lines(age,ifr.excess.w,lty=1,lwd=2,col="red3")
axis(1,at=seq(0,90,10),labels=seq(0,90,10),cex.axis=1,font.axis=1,mgp=c(3,0.50,0),lty=1,lwd=1)
axis(2,at=seq(0,0.22,0.02),labels=seq(0,22,2),cex.axis=1,font.axis=1,mgp=c(3,0.65,0),las=1,adj=1,lty=1,lwd=1)
mtext(side=1,"Age (years)",cex=1,font=1,line=1.75)
mtext(side=2,"Infection fatality risk for SARS-CoV-2 (%)",cex=1,font=1,las=3,line=2.25)
mtext(side=2,at=0.235,"B",cex=1.25,font=1,las=1,line=2.35)
mtext(side=2,at=0.235,"Excess deaths from all causes",cex=1,font=1,las=1,adj=0,line=0)
dev.off()