# R FUNCTIONS FOR META-ANALYSIS (2018, JANUARY)

# Citation: Tarlow, K. R. (2018, January). R functions for
#           meta-analysis (R code). Retrieved from 
#           http://ktarlow.com/stats/

# R SYNTAX COPYRIGHT (C) 2018 KEVIN R. TARLOW
# url: http://www.ktarlow.com/stats
# email: krtarlow@gmail.com

# adapted from: Borenstein, M., Hedges, L. V., Higgins, 
#               J. P. T., & Rothstein, H. R. (2009). 
#               Introduction to meta-analysis. John Wiley
#               & Sons. 

# This work is licensed under the Creative Commons 
# Attribution-NonCommercial 3.0 Unported License. 
# To view a copy of this license, visit 
# http://creativecommons.org/licenses/by-nc/3.0/deed.en_US
# 
# You are free to copy, distribute, transmit, and adapt
# the work under the following conditions:
#
# Attribution - You must attribute the work in the manner 
# specified by the author, KEVIN R. TARLOW (but not in any way 
# that suggests that the author endorses you or your use 
# of the work).
#
# Noncommercial  You may not use this work for commercial 
# purposes.

remeta <- function(y,vy) {

# random-effects meta-analysis
# adapted from Borenstein et al. (2009). Introduction to Meta-Analysis

# arguments: y = effect sizes
#            vy = variance of effect sizes

	if(length(y) != length(vy)) {
		stop("y and vy must be equal lengths")
	}

	k <- length(y)	# k = number of studies
	df <- k - 1		# degrees of freedom

	w.fe <- 1 / vy	# fixed-effects weights

	Q <- sum(w.fe * y^2) - (sum(w.fe * y)^2 / sum(w.fe))
	pQ <- 1-pchisq(Q,df=df)   # pQ = prob(Q)

	C <- sum(w.fe) - (sum(w.fe^2)/sum(w.fe))

	Tsq <- (Q - df) / C
	if (is.nan(Tsq)) {   # NaN result with 1 study (0 / 0)
		Tsq <- 0 
	} else if (Tsq < 0) {
		Tsq <- 0
	}

	Isq <- (Q - df) / Q
	if (is.nan(Isq)) {   # NaN result with 1 study (0 / 0)
		Isq <- 0
	} else if (Isq < 0) {
		Isq <- 0
	}

	v <- vy + Tsq	# total mean = var within (vy) + var between (Tsq)
	w <- 1 / v		# random-effects weights
	wy <- w*y
	wysq <- w*(y^2)
	wsq <- w^2

	mean <- sum(wy) / sum(w) 	# random-effects weighted mean
	var <- 1 / sum(w) 		# variance of weighted mean
	se <- sqrt(var) 			# standard error of weighted mean
	ll <- mean - (1.96 * se)
	ul <- mean + (1.96 * se)
	z <- mean / se
	p <- 2 * pnorm(-abs(z))

	return(list(
		mean=mean,
		var=var,
		se=se,
		ll=ll,
		ul=ul,
		z=z,
		p=p,
		Q=Q,
		df=df,
		pQ=pQ,
		Tsq=Tsq,
		Isq=Isq,
		C=C,
		v=v,
		w=w
	))

}

remod <- function(y,vy,g,pool=TRUE) {

# random effects meta-analysis with categorical moderator
# see Borenstein et al. (2009) Chapter 19: Subgroup analyses. 

# arguments: y = vector of effect sizes
#            vy = vector of ES variances
#            g = vector of grouping variables (numerical or character)
#            pool = logical (T/F) indicating whether or not to pool variances for
#                   Tau-sq estimate (default is TRUE)

	group <- factor(g)
	k <- length(levels(group))

	# summary statistics data frame
	resummary <- data.frame(
		mean=numeric(length=k+1), 	# random-effects mean
		v=numeric(length=k+1), 		# variance of mean
		se=numeric(length=k+1),		# standard error of mean
		ll=numeric(length=k+1),		# 95% CI lower limit
		ul=numeric(length=k+1),		# 95% CI upper limit
		z=numeric(length=k+1),		# Z-statistic
		p=numeric(length=k+1),		# p value
		Q=numeric(length=k+1)		# Q* (for between-groups anova only)
	)
	row.names(resummary) <- c(levels(group),"Combined")

	# fixed-effect anova data frame (for within- and total variance)
	feanova <- data.frame(
		Q=numeric(length=k+3), 
		df=numeric(k+3), 
		p=numeric(length=k+3) 
	)
	row.names(feanova) <- c(levels(group),"Within","Between","Total")

	# random-effect anova data frame (for between-groups variance)
	reanova <- data.frame(
		Q=numeric(length=k+3), 
		df=numeric(k+3), 
		p=numeric(length=k+3) 
	)
	row.names(reanova) <- c(levels(group),"Within","Between","Total")
	
	# initialize R-squared, proportion of variance explained by moderator
	Rsq <- NA

	sum.C <- 0   # for pooled variance computations

	# computations for fixed-effect anova (for within- and total variance)
	for (i in 1:k) {
		
		ygroup <- y[g==levels(group)[i]]
		vygroup <- vy[g==levels(group)[i]]
		meta <- remeta(ygroup,vygroup)

		feanova[i,"Q"] <- meta$Q
		feanova[i,"df"] <- meta$df
		feanova[i,"p"] <- meta$pQ

		# blank out rows for groups in random-effect model anova table
		reanova[i,"df"] <- NA
		reanova[i,"p"] <- NA

		sum.C <- sum.C + meta$C   # for pooled variance computations

	}

	# sum within group Q and df for fixed-effect model
	# see Borenstein et al. (2009) Ch. 19, p. 158, Table 19.3
	feanova["Within","Q"] <- sum(feanova[,"Q"],na.rm=TRUE)
	feanova["Within","df"] <- sum(feanova["df"],na.rm=TRUE)
	feanova["Within","p"] <- 1-pchisq(feanova[k+1,"Q"],df=feanova[k+1,"df"])

	# get total Q and df for fixed-effect model
	meta <- remeta(y,vy)
	feanova["Total","Q"] <- meta$Q
	feanova["Total","df"] <- meta$df
	feanova["Total","p"] <- meta$pQ

	# blank out row for between-groups variance (refer to random-effect model anova)
	feanova["Between","Q"] <- feanova["Total","Q"] - feanova["Within","Q"]
	feanova["Between","df"] <- k - 1
	feanova["Between","p"] <- 1-pchisq(feanova["Between","Q"],df=feanova["Between","df"])

	# blank out row for within-groups variance in random-effect model anova table
	reanova["Within","df"] <- NA
	reanova["Within","p"] <- NA

	# blank out row for total variance in random-effect model anova table
	reanova["Total","df"] <- NA
	reanova["Total","p"] <- NA
	
	if (pool) {

		# pooled estimate of T^2
		# see Borenstein et al. (2009) Ch. 19, p. 172

		Tsq.within <- (feanova["Within","Q"] - feanova["Within","df"]) / sum.C
		if (Tsq.within < 0 ) {
			Tsq.within <- 0
		}
		
		v <- vy + Tsq.within 	# total variance (v) using pooled Tsq
		w <- 1 / v
		
		sum.Q <- 0

		for (i in 1:k) {

			ygroup <- y[g==levels(group)[i]]
			vgroup <- v[g==levels(group)[i]]
			wgroup <- w[g==levels(group)[i]]
			
			resummary[i,"mean"] <- sum(wgroup * ygroup) / sum(wgroup)
			resummary[i,"v"] <- 1 / sum(wgroup)
			resummary[i,"se"] <- sqrt(resummary[i,"v"])
			resummary[i,"ll"] <- resummary[i,"mean"] - (1.96 * resummary[i,"se"])
			resummary[i,"ul"] <- resummary[i,"mean"] + (1.96 * resummary[i,"se"])
			resummary[i,"z"] <- resummary[i,"mean"] / resummary[i,"se"]
			resummary[i,"p"] <- 2 * pnorm(-abs(resummary[i,"z"]))
			resummary[i,"Q"] <- sum(wgroup * (ygroup^2)) - (sum(wgroup * ygroup)^2 / sum(wgroup))
			
			reanova[i,"Q"] <- resummary[i,"Q"]
			
			# add summed computations for combined effect
			sum.Q <- sum.Q + resummary[i,"Q"]

		}
		
		resummary["Combined","mean"] <- sum(w*y) / sum(w)
		resummary["Combined","v"] <- 1 / sum(w)
		resummary["Combined","se"] <- sqrt(resummary["Combined","v"])
		resummary["Combined","ll"] <- resummary["Combined","mean"] - (1.96 * resummary["Combined","se"])
		resummary["Combined","ul"] <- resummary["Combined","mean"] + (1.96 * resummary["Combined","se"])
		resummary["Combined","z"] <- resummary["Combined","mean"] / resummary["Combined","se"]
		resummary["Combined","p"] <- 2 * pnorm(-abs(resummary["Combined","z"]))
		resummary["Combined","Q"] <- sum(w * (y^2)) - (sum(w*y)^2 / sum(w))
		
		reanova["Total","Q"] <- resummary["Combined","Q"]
		
		# Q-test based on anova, with pooled estimate of T^2
		# *** between-groups row ***
		# see Borenstein et al. (2009) Ch. 19, p. 177
		
		reanova["Within","Q"] <- sum.Q
		reanova["Between","Q"] <- reanova["Total","Q"] - reanova["Within","Q"]
		reanova["Between","df"] <- k - 1
		reanova["Between","p"] <- 1-pchisq(reanova["Between","Q"],df=reanova["Between","df"])
		
		# R-squared, proportion of variance explained by moderator
		# only for pooled estimate of T^2
		# see Borenstein et al. (2009) Ch. 19, p. 179
		
		Rsq <- 1 - (Tsq.within / remeta(y,vy)$Tsq)

	} else {

		# separate estimates of T^2 (not pooled)
		# see Borenstein et al. (2009) Ch. 19, p. 165
		
		sum.w <- 0
		sum.wy <- 0
		sum.wysq <- 0
		sum.Q <- 0

		for (i in 1:k) {

			ygroup <- y[g==levels(group)[i]]
			vygroup <- vy[g==levels(group)[i]]
			meta <- remeta(ygroup,vygroup)

			resummary[i,"mean"] <- sum(meta$w * ygroup) / sum(meta$w)
			resummary[i,"v"] <- 1 / sum(meta$w)
			resummary[i,"se"] <- sqrt(resummary[i,"v"])
			resummary[i,"ll"] <- resummary[i,"mean"] - (1.96 * resummary[i,"se"])
			resummary[i,"ul"] <- resummary[i,"mean"] + (1.96 * resummary[i,"se"])
			resummary[i,"z"] <- resummary[i,"mean"] / resummary[i,"se"]
			resummary[i,"p"] <- 2 * pnorm(-abs(resummary[i,"z"]))
			resummary[i,"Q"] <- sum(meta$w * (ygroup^2)) - (sum(meta$w * ygroup)^2 / sum(meta$w))

			reanova[i,"Q"] <- resummary[i,"Q"]

			# add summed computations for combined effect
			sum.w <- sum.w + sum(meta$w)
			sum.wy <- sum.wy + sum(meta$w * ygroup)
			sum.wysq <- sum.wysq + sum(meta$w * (ygroup^2))
			sum.Q <- sum.Q + resummary[i,"Q"]

		}

		resummary["Combined","mean"] <- sum.wy / sum.w
		resummary["Combined","v"] <- 1 / sum.w
		resummary["Combined","se"] <- sqrt(resummary["Combined","v"])
		resummary["Combined","ll"] <- resummary["Combined","mean"] - (1.96 * resummary["Combined","se"])
		resummary["Combined","ul"] <- resummary["Combined","mean"] + (1.96 * resummary["Combined","se"])
		resummary["Combined","z"] <- resummary["Combined","mean"] / resummary["Combined","se"]
		resummary["Combined","p"] <- 2 * pnorm(-abs(resummary["Combined","z"]))
		resummary["Combined","Q"] <- sum.wysq - (sum.wy^2 / sum.w)

		reanova["Total","Q"] <- resummary["Combined","Q"]

		# Q-test based on anova, with separate estimates of T^2
		# *** between-groups row ***
		# see Borenstein et al. (2009) Ch. 19, p. 169

		reanova["Within","Q"] <- sum.Q
		reanova["Between","Q"] <- reanova["Total","Q"] - reanova["Within","Q"]
		reanova["Between","df"] <- k - 1
		reanova["Between","p"] <- 1-pchisq(reanova["Between","Q"],df=reanova["Between","df"])

	}

	if(pool) {
		cat("\nSubgroup Analyses: Pooled estimate of Tau-sq\n")
	} else {
		cat("\nSubgroup Analyses: Separate estimates of Tau-sq\n")
	}

	cat("\nRandom-effects model summary statistics\n")
	print(round(resummary,4))

	cat("\nFixed-effects model ANOVA table (within-groups and total variance)\n")
	print(round(feanova,4))
	
	cat("\nRandom-effects model ANOVA table (between-groups variance)\n")
	print(round(reanova,4))

	# R-squared estimate (only for pooled T^2)
	# see Borenstein et al. (2009) Ch. 19, p. 179
	if (pool) {
		cat("\nR-squared:",round(Rsq,4),"\n")
	}

	cat("\n")

	invisible(list(summary=resummary, feanova=feanova, reanova=reanova, Rsq=Rsq))

}