plot.ns.ci.prg <- function(data, variable, fit, pos, df, knots, xref, typeout = "RR", step = 0.1, xtitle = upperCase(variable),ytitle = upperCase(typeout), level=0.95, na.action = T, col=1,lty="l",lwd=1,linkf="log") { ##################################################################################################### # # The function plot.ns.ci.prg produces a publication-quality graph of RR, logRR, or # MPC and the associated 95% confidence limits for an independent variable with respect to a # specified reference value xref . # # This function has 12 arguments which are described as follows: # # data: Original S-Plus data set; # variable: Name of the predictor variable; # fit: The fitted object from the generalized linear model; # pos: the position where the first coefficient of the predictor variable begins in GLM summary; # df : number of degrees of freedom (df) for the predictor variable. If no specified knots are # given, the program will select the df + 1 knots (including boundary knots) at suitably # chosen quantiles of the predictor variable; # knots: knots that define the natural spline, including boundary points. When knots and # df are both given, the number of knots is equal to df + 1; # xref : the reference predictor value; # typeout: You can choose "LOGRR", "RR", or "MPC" to output log Relative Risk, the # Relative Risk, or the Mean Percent Change, respectively. The default is "RR" # step: the step length to generate a vector of predictor values to get their RR, or logRR, # or MPC as compared to xref . The smaller the step, the more continuous the graph # looks, but the longer it will take to run the program. The default is 0.1; # xtitle: the title of the x-axis. The default is the predictor variable (in uppercase); # ytitle: the title of the y-axis. The default is the argument typeout (in uppercase); # level: The level for the confidence interval # na.action: This parameter should be set to na.exclude if the original regression model excluded # observations that were flagged as missing by na.exclude. The default is na.exclude, which means to # exclude missing values from the computations. If na.action=na.fail, then na.fail will be applied, # which causes an error if missing values are present. # col: The color of lines # lty: The types of lines # lwd: The width of lines # linkf: The link function used in the model ######################################################################################################## data = na.action(data) # Get the variable column from data variable.vec <- data[, variable] # Get the range of variable variable.vec.range <- range(variable.vec, na.rm = TRUE) # if xref is out of range, a warning is given if((xref < variable.vec.range[1]) | (xref > variable.vec.range[2])) { warning(paste("the reference value xref is not in the range of data variable")) } # Generate a sequence of points for which the Log RR as compared to the reference point xref will be calculated. xvec = seq(variable.vec.range[1], variable.vec.range[2], by = step) # if both df and knots are given, then the number of knots should be equal to df+1, otherwise # a warning will be given. if(!missing(df) & !missing(knots)) { if((df + 1) != length(knots)) { warning(paste("df+1 is not equal to the number of knots")) } } # if knots is given instead of df, then df is equal to the number of knots - 1 if(missing(df) & (!missing(knots))) { df <- length(knots) - 1 } # if df is given instead of knots, then the program will select the df + 1 knots (including boundary knots) at suitably # chosen quantiles of the predictor variable; if(!missing(df) & (missing(knots))) { knots <- quantile(variable.vec, seq(0, 1, length = (df + 1)), na.rm = TRUE) } # if either df or knots is given, a warning is given. if(missing(df) & missing(knots)) { warning(paste("please specify df or knots")) } # get the begining position of basis functions begin.pos <- pos # get the end position of basis functions end.pos <- pos + df - 1 # get the estimated coefficients coeff <- summary(fit)$coefficients[begin.pos:end.pos, 1] # get the correlation matrix of the estimated coefficients corr <- summary(fit)$correlation[begin.pos:end.pos, begin.pos:end.pos] # get the standard deviations of the estimated coefficients std <- summary(fit)$coefficients[begin.pos:end.pos, 2] # compute the covariance matrix of estimated coefficients of basis # natural splines. var <- matrix(0, df, df) for(i in 1:df) { for(j in i:df) { var[i, j] <- std[i] * std[j] * corr[i, j] } } for(i in 1:df) { for(j in 1:(i - 1)) { var[i, j] <- var[j, i] } } # evaluate the basis functions at the reference point xref basis0 <- ns(xref, knots = knots, Boundary.knots = TRUE) # extract the length of the vector needed to save logRR. nx <- length(xvec) # Initialize the vectors of the log RR and lower and upper bound of confidence interval of log RR logrr <- rep(0, nx) lower.logrr <- rep(0, nx) upper.logrr <- rep(0, nx) # calculate the log RR and confidence interval for each point for(i in 1:nx) { # evaluate the basis functions at the proposed point x basis1 <- ns(xvec[i], knots = knots, Boundary.knots = TRUE) # evaluate the difference of the bases functions at the # proposed point x from the reference point xref diff <- basis1 - basis0 # evaluate the variance of log rate ratio varlogrr <- t(diff) %*% var %*% diff # evaluate the log rate ratio logrr[i] <- t(diff) %*% coeff # calculate the lower and upper bound of confidence interval of log RR normstat <- qnorm(1-(1-level)/2) lower.logrr[i] <- logrr[i] - normstat * sqrt(varlogrr) upper.logrr[i] <- logrr[i] + normstat * sqrt(varlogrr) } if (upperCase(linkf)=="LOG"){ # output the graph of mean percent change and the associate confidence interval if typeout is not "RR" or "LOGRR" # output the graph of log relative risk and the associate confidence interval if typeout = "LOGRR" # output the graph of relative risk and the associate confidence interval if typeout = "RR" if(upperCase(typeout) == "RR") { plot(xvec, exp(logrr), ylim = c(exp(min(lower.logrr)), exp(max(upper.logrr))), type = lty, xlab = xtitle, ylab = ytitle, col=col,lwd=lwd) lines(xvec, exp(lower.logrr), type = lty,lwd=lwd) lines(xvec, exp(upper.logrr), type = lty,lwd=lwd) abline(h = 1) abline(v = xref) } else if(upperCase(typeout) == "LOGRR") { plot(xvec, logrr, ylim = c(min(lower.logrr), max(upper.logrr)), type = lty, xlab = xtitle, ylab = ytitle, col=col,lwd=lwd) lines(xvec, lower.logrr, type = lty,lwd=lwd) lines(xvec, upper.logrr, type = lty,lwd=lwd) abline(h = 0) abline(v = xref) } else { plot(xvec, (exp(logrr) - 1) * 100, ylim = c((exp(min(lower.logrr)) - 1) * 100, (exp(max(upper.logrr)) - 1) * 100), type = lty, xlab = xtitle, ylab = ytitle, , col=col,lwd=lwd) lines(xvec, (exp(lower.logrr) - 1) * 100, type = lty,lwd=lwd) lines(xvec, (exp(upper.logrr) - 1) * 100, type = lty,lwd=lwd) abline(h = 0) abline(v = xref) } } else { plot(xvec, logrr, ylim = c(min(lower.logrr), max(upper.logrr)), type = lty, xlab = xtitle, ylab = ytitle, col=col,lwd=lwd) lines(xvec, lower.logrr, type = lty,lwd=lwd) lines(xvec, upper.logrr, type = lty,lwd=lwd) abline(h = 0) abline(v = xref) } }