| Type: | Package | 
| Title: | Data and Code to Accompany Generalized Linear Models, 2nd Edition | 
| Version: | 0.1.0 | 
| Description: | Contains all the data and functions used in Generalized Linear Models, 2nd edition, by Jeff Gill and Michelle Torres. Examples to create all models, tables, and plots are included for each data set. | 
| License: | GPL (≥ 3) | 
| Depends: | R (≥ 2.10) | 
| Encoding: | UTF-8 | 
| LazyData: | true | 
| RoxygenNote: | 6.1.1 | 
| Imports: | MASS, pBrackets, nnet, effects, AER, pscl, foreign, Matrix, lme4, lmtest, sandwich, censReg, plm | 
| NeedsCompilation: | no | 
| Packaged: | 2019-07-19 01:05:52 UTC; michelletorres | 
| Author: | Jeff Gill [aut], Michelle Torres [aut, cre], Simon Heuberger [aut] | 
| Maintainer: | Michelle Torres <smtorres@wustl.edu> | 
| Repository: | CRAN | 
| Date/Publication: | 2019-07-19 09:40:05 UTC | 
Data on inflation increase in Africa
Description
Data for the Africa example used in chapter 7
Usage
data(africa)
Format
A data frame with 47 rows and 7 variables:
- INFLATN
- Inflation rates 
- DICTATOR
- Number of years of personal dictatorship that occurred from independence to 1989 
- SIZE
- Area at the end of the period, in thousand square kilometers 
- GROWTH
- Average annual gross national product (GNP) rate of growth in percent from 1965 to 1989 
- CHURMED
- Number of church-operated hospitals and medical clinics as of 1973 
- CONSTIT
- the constitutional structure when not a dictatorship in ascending centrality (0 = monarchy, 1 = presidential, 2 = presidential/parliamentary mix, 3 = parliamentary) 
- REPRESS
- Violence and threats of violence by the government against opposition political activity from 1990 to 1994 
...
Examples
data(africa)
attach(africa)
library(lmtest)
library(plm)
## Table 7.4
y <- (INFLATN/100)[-16]
y[y > 1] <- 1
X <- cbind(DICTATOR,SIZE,GROWTH,CHURMED,CONSTIT,REPRESS)[-16,]
X[,4] <- log(X[,4]+.01)
africa.glm <- glm(y ~ X[,1:6], family=quasibinomial('logit'))
out.mat <- coeftest(africa.glm, vcov.=vcovHC(africa.glm, type="HC0"))
( out.mat <- round(cbind(out.mat[,1:2], out.mat[,1] - 1.96*out.mat[,2],
                         out.mat[,2] + 1.96*out.mat[,2]),4) )
Data on campaign contributions in California in the 2014 House elections
Description
Data for the campaign contributions example used in chapter 6
Usage
data(campaign)
Format
A data frame with 180 rows and 16 variables:
- DTRCT
- California district 
- CANDID
- FEC ID 
- CYCLE
- Election cycle 
- NAME
- Name of the candidate 
- INCUMCHALL
- Incumbency status 
- CFSCORE
- CFscore 
- CANDGENDER
- Gender of the candidate 
- PARTY
- Party of the candidate 
- TOTCONTR
- Contributions to the 2014 electoral campaigns in the 53 districts of California in the U.S. House of Representatives 
- TOTPOP
- Total state population 
- FEMALE
- Number of female citizens in the state 
- WHITE
- Number of white citizens in the state 
- HISP
- Number of Hispanic citizens in the state 
- FEMALEPCT
- Percentage of female citizens in the state 
- WHITEPCT
- Percentage of white citizens in the state 
- HISPPCT
- Percentage of Hispanic citizens in the state 
...
Examples
data(campaign)
attach(campaign)
library(pBrackets)
## Gamma model
cmpgn.out <- glm(TOTCONTR ~ CANDGENDER + PARTY + INCUMCHALL + HISPPCT,
             family=Gamma(link = 'log'), data=campaign)
## Linear model
cmpgn.out_lm <- lm(TOTCONTR ~ CANDGENDER + PARTY + INCUMCHALL + HISPPCT, data=campaign)
## Table 6.8
round(glm.summary(cmpgn.out),4)
cmpgn.out$null.deviance
cmpgn.out$df.null
cmpgn.out$deviance
cmpgn.out$df.residual
logLik(cmpgn.out)
cmpgn.out$aic
## Table 6.9
summary(cmpgn.out_lm)
confint(cmpgn.out_lm)
## Figure 6.4
opar = par(mfrow=c(1,1), mar=c(5.1,4.1,4.1,2.1), oma=c(0,0,0,0))
par(mar=c(4,3,3,0),oma=c(1,1,1,1))
hist(campaign$TOTCONTR,xlab="",ylab="", yaxt="n", xaxt="n",
     xlim=c(0,9000), ylim=c(0, 130), main="",
     col = "gray40")
axis(1, tck=0.03, cex.axis=0.9, mgp=c(0.3, 0.3, 0), lty=1, lwd=0, lwd.ticks = 1)
axis(2, tck=0.03, cex.axis=0.9, mgp=c(0.3, 0.3, 0), lty=1, lwd=1, lwd.ticks = 1, las=2)
title(xlab = 'Total campaign contributions (thousands of dollars)',
      ylab= "Frequency",
      line = 1.7, cex.lab=1)
title(line = 1, main="Distribution of campaign contributions", font.main=1)
par(opar)
## Figure 6.5
campaign.mu <- predict(cmpgn.out_lm)
campaign.y <- campaign$TOTCONTR
par(mfrow=c(1,3), mar=c(3,3,2,1),oma=c(1,1,1,1))
plot(campaign.mu,campaign.y,xlab="",ylab="", yaxt='n', xaxt="n", pch="+")
axis(1, tck=0.02, cex.axis=0.9, mgp=c(0.3, 0.3, 0), lty=1, lwd=0, lwd.ticks = 1)
axis(2, tck=0.02, cex.axis=0.9, mgp=c(0.3, 0.3, 0), lty=1, lwd=0, lwd.ticks = 1, las=2)
title(xlab = "Fitted values", ylab="Observed values",
      line = 1.7, cex.lab=1.3)
title(main="Model Fit Plot",
      line = 1, cex.main=1.7, font.main=1)
abline(lm(campaign.y~campaign.mu)$coefficients, lwd=2)
plot(fitted(cmpgn.out_lm),resid(cmpgn.out_lm,type="pearson"),xlab="",ylab="",
     yaxt='n', xaxt="n", pch="+")
axis(1, tck=0.02, cex.axis=0.9, mgp=c(0.3, 0.3, 0), lty=1, lwd=0, lwd.ticks = 1)
axis(2, tck=0.02, cex.axis=0.9, mgp=c(0.3, 0.3, 0), lty=1, lwd=0, lwd.ticks = 1, las=2)
title(xlab = "Fitted values", ylab="Pearson Residuals",
      line = 1.7, cex.lab=1.3)
title(main="Residual Dependence Plot",
      line = 1, cex.main=1.7, font.main=1)
abline(0,0, lwd=2)
plot(cmpgn.out_lm,which=2, pch="+",
     sub.caption = "", caption = "", mgp=c(1.5, 0.3, 0),
     tck=0.02, cex.axis=0.9, cex.lab=1.3, lty=1)
title(main="Normal-Quantile Plot",
      line = 1, cex.main=1.7, font.main=1)
par(opar)
## Figure 6.6
mygray = rgb(153, 153, 153, alpha = 200, maxColorValue = 255)
newdat_gender <- data.frame(CANDGENDER = c('F','M'), PARTY= rep('Democrat',2),
                            INCUMCHALL=rep("C", 2), HISPPCT=rep(mean(campaign$HISPPCT),2))
newdat_party <- data.frame(CANDGENDER = rep('M', 3), PARTY= c('Democrat','Republican',
                           'Independent'), INCUMCHALL=rep("C", 3),
                           HISPPCT=rep(mean(campaign$HISPPCT),3))
newdat_incumchall <- data.frame(CANDGENDER = rep('M', 3), PARTY= rep('Democrat',3),
                                INCUMCHALL=c('C', 'I', 'O'),
                                HISPPCT=rep(mean(campaign$HISPPCT),3))
newdat_hisiq <- data.frame(CANDGENDER = rep('M', 2), PARTY= rep('Democrat',2),
                           INCUMCHALL=rep("C", 2),
                           HISPPCT=as.numeric(summary(campaign$HISPPCT)[c(2,5)]))
newdat_hispf <- data.frame(CANDGENDER = rep('M', 200), PARTY= rep('Democrat',200),
                           INCUMCHALL=rep("C", 200), HISPPCT=seq(.1, .9, length.out = 200))
preds_gender <- predict(cmpgn.out, newdata = newdat_gender, se.fit = TRUE)
preds_party <- predict(cmpgn.out, newdata = newdat_party, se.fit = TRUE)
preds_incumchall <- predict(cmpgn.out, newdata = newdat_incumchall, se.fit = TRUE)
preds_hispiq <- predict(cmpgn.out, newdata = newdat_hisiq, se.fit = TRUE)
preds_hispf <- predict(cmpgn.out, newdata = newdat_hispf, se.fit = TRUE)
cis_gender <- round(glm.cis(preds_gender$fit, preds_gender$se.fit, 0.95,cmpgn.out$df.residual),4)
cis_party <- round(glm.cis(preds_party$fit, preds_party$se.fit, 0.95,cmpgn.out$df.residual),4)
cis_incumchall <- round(glm.cis(preds_incumchall$fit, preds_incumchall$se.fit, 0.95,
                                cmpgn.out$df.residual),4)
cis_hispiq <- round(glm.cis(preds_hispiq$fit, preds_hispiq$se.fit, 0.95,cmpgn.out$df.residual),4)
cis_hispf <- round(glm.cis(preds_hispf$fit, preds_hispf$se.fit, 0.95,cmpgn.out$df.residual),4)
iqrange = cbind(summary(campaign$HISPPCT)[c(2,5)],cis_hispiq[2,4] - cis_hispf[1,4])
par(mfrow=c(2,2), mar=c(4,3,3,0),oma=c(1,1,1,1))
plot(1:2, cis_gender[,4], type="n",xlab="",ylab="",  yaxt="n", xaxt="n",
     xlim=c(0,3), ylim=c(100, 700))
segments(1:2, cis_gender[,5], 1:2, cis_gender[,6], lwd=2, col="gray60")
points(1:2, cis_gender[,4], pch=16, cex=2.5)
text(1:2, cis_gender[,4], labels = c("F", "M"), col="white", cex=0.9)
segments(1.05, cis_gender[1,4], 1.95, cis_gender[2,4], lty=2)
brackets(1, cis_gender[1,4]+20, 2, cis_gender[1,4]+20, h = 45,  ticks = 0.5, lwd=2)
text(1.5, cis_gender[1,4]+100, bquote(hat(y)['F']-hat(y)['M'] ~ '='
     ~ .(cis_gender[2,4]-cis_gender[1,4])), cex=0.9)
axis(1, at=1:2, labels = c("Female", "Male"), tck=0.03, cex.axis=0.9, mgp=c(0.3, 0.3, 0),
     lty=1, lwd=0, lwd.ticks = 1)
axis(2, tck=0.03, cex.axis=0.9, mgp=c(0.3, 0.3, 0), lty=1, lwd=0, lwd.ticks = 1, las=2)
title(xlab = 'Gender of candidate',
      ylab="Total campaign contributions",
      line = 1.7, cex.lab=1)
title(line = 1, main="Gender of candidate", font.main=3)
plot(1:3, cis_party[,4], type="n",xlab="",ylab="",  yaxt="n", xaxt="n",
     xlim=c(0.5,3.5), ylim=c(0, 600))
segments(1:3, cis_party[,5], 1:3, cis_party[,6], lwd=2, col="gray60")
points(1:3, cis_party[,4], pch=15:17, cex=2.5)
text(1:3, cis_party[,4], labels = c("D", "R", "I"), col="white", cex=0.8)
segments(c(1.05,2.05), cis_party[1:2,4], c(1.95,2.95), cis_party[2:3,4], lty=2)
brackets(1, cis_party[2,4]+20, 2, cis_party[2,4]+20, h = 45,  ticks = 0.5, lwd=2)
brackets(3, cis_party[3,4]+20, 2, cis_party[3,4]+20, h = 45,  ticks = 0.5, lwd=2)
text(1.5, cis_party[1,4]+160, bquote(hat(y)['R']-hat(y)['D'] ~ '='
     ~ .(cis_party[2,4]-cis_party[1,4])), cex=0.9)
text(2.5, cis_party[3,4]-40, bquote(hat(y)['I']-hat(y)['R'] ~ '='
     ~ .(cis_party[3,4]-cis_party[2,4])), cex=0.9)
axis(1, at=1:3, labels = c("Democrat", "Republican", "Independent"), tck=0.03, cex.axis=0.9,
     mgp=c(0.3, 0.3, 0), lty=1, lwd=0, lwd.ticks = 1)
axis(2, tck=0.03, cex.axis=0.9, mgp=c(0.3, 0.3, 0), lty=1, lwd=0, lwd.ticks = 1, las=2)
title(xlab = 'Party of candidate',
      ylab="Total campaign contributions",
      line = 1.7, cex.lab=1)
title(line = 1, main="Party of candidate", font.main=3)
plot(1:3, cis_incumchall[,4], type="n",xlab="",ylab="",  yaxt="n", xaxt="n",
     xlim=c(0.5,3.5), ylim=c(0, 3200))
segments(1:3, cis_incumchall[,5], 1:3, cis_incumchall[,6], lwd=2, col="gray60")
points(1:3, cis_incumchall[,4], pch=15:17, cex=1.5)
segments(c(1.05,2.05), cis_incumchall[1:2,4], c(1.95,2.95), cis_incumchall[2:3,4], lty=2)
brackets(1, cis_incumchall[2,4]+20, 2, cis_incumchall[2,4]+20, h = 105,  ticks = 0.5, lwd=2)
brackets(3, cis_incumchall[3,4]+20, 2, cis_incumchall[3,4]+20, h = 105,  ticks = 0.5, lwd=2)
text(1.5, cis_incumchall[2,4]+285, bquote(hat(y)['I']-hat(y)['C'] ~ '='
     ~ .(cis_incumchall[2,4]-cis_incumchall[1,4])), cex=0.9)
text(2.5, cis_incumchall[3,4]-200, bquote(hat(y)['O']-hat(y)['I'] ~ '='
     ~ .(cis_incumchall[3,4]-cis_incumchall[2,4])), cex=0.9)
axis(1, at=1:3, labels = c("Challenger", "Incumbent", "Open seat"), tck=0.03, cex.axis=0.9,
     mgp=c(0.3, 0.3, 0), lty=1, lwd=0, lwd.ticks = 1)
axis(2, tck=0.03, cex.axis=0.9, mgp=c(0.3, 0.3, 0), lty=1, lwd=0, lwd.ticks = 1, las=2)
title(xlab = 'Status of candidate',
      ylab="Total campaign contributions",
      line = 1.7, cex.lab=1)
title(line = 1, main="Status of candidate", font.main=3)
plot(newdat_hispf$HISPPCT, cis_hispf[,4], type="n",xlab="",ylab="",  yaxt="n", xaxt="n",
     ylim = c(0,1100))
polygon(x = c(newdat_hispf$HISPPCT, rev(newdat_hispf$HISPPCT)),
        y = c(cis_hispf[,5], rev(cis_hispf[,6])), col = mygray, border = NA)
lines(newdat_hispf$HISPPCT, cis_hispf[,4], lwd=2)
rug(campaign$HISPPCT)
segments(iqrange[,1], cis_hispiq[,4], iqrange[,1], rep(500,2), lty=2)
segments(iqrange[1,1], 500, iqrange[2,1], 500, lty = 2)
brackets(iqrange[1,1], 510, iqrange[2,1], 510, h = 75,  ticks = 0.5, lwd=2)
text(abs((iqrange[2,1]-iqrange[1,1])/2)+iqrange[1,1], 450, 'Interquartile range', cex=0.8)
text(iqrange[,1], cis_hispiq[,4]-50, round(iqrange[,1],3), cex=0.8)
text(abs((iqrange[2,1]-iqrange[1,1])/2)+iqrange[1,1], 655,
          labels=bquote(hat(y)['Q3']-hat(y)['Q1'] ~ '=' ~ .(iqrange[1,2])), cex=0.9)
axis(1, tck=0.03, cex.axis=0.9, mgp=c(0.3, 0.3, 0), lty=1, lwd=0, lwd.ticks = 1)
axis(2, tck=0.03, cex.axis=0.9, mgp=c(0.3, 0.3, 0), lty=1, lwd=0, lwd.ticks = 1, las=2)
title(xlab = '% Hispanic population in Congressional District',
      ylab="Total campaign contributions",
      line = 1.7, cex.lab=1)
title(line = 1, main="Hispanic constituency", font.main=3)
par(opar)
Data on bills assigned to House committees in the 103rd and 104th Houses of Representatives
Description
Data for the committees example used in chapters 6
Usage
data(committee)
Format
A data frame with 20 rows and 6 variables:
- SIZE
- Number of members on the committee 
- SUBS
- Number of subcommittees 
- STAFF
- Number of staff assigned to the committee 
- PRESTIGE
- Dummy variable indicating whether or not it is a high-prestige committee 
- BILLS103
- Number of bills in the 103rd House 
- BILLS104
- Number of bills in the 104th House 
...
Examples
data(committee)
attach(committee)
library(AER)
library(MASS)
library(pscl)
## Table 6.6
committee
## Table 6.7
committee.out <- glm.nb(BILLS104 ~ SIZE + SUBS * (log(STAFF)) + PRESTIGE + BILLS103)
summary.glm(committee.out)
data.frame(cbind(round(cbind(summary(committee.out)$coef[,1:2], confint(committee.out)),4)[,1],
       round(cbind(summary(committee.out)$coef[,1:2], confint(committee.out)),4)[,2],
       round(cbind(summary(committee.out)$coef[,1:2], confint(committee.out)),4)[,3],
       round(cbind(summary(committee.out)$coef[,1:2], confint(committee.out)),4)[,4]))
## Figure 6.3
z.matrix <- matrix(0,200,200)
for(i in 1:200)  {
        for(j in 1:200)  {
                if(j < 70)    z.matrix[i,j] <- 1
                if(j < 40)    z.matrix[i,j] <- 2
                if(j < 10)    z.matrix[i,j] <- 3
                if(j == 1)    z.matrix[i,j] <- 3.001
                if(j > 130)   z.matrix[i,j] <- 1
                if(j > 160)   z.matrix[i,j] <- 2
                if(j > 190)   z.matrix[i,j] <- 3
                if(j == 200)  z.matrix[i,j] <- 3.001
        }
}
pears <- resid(committee.out,type="pearson")
devs <- resid(committee.out,type="deviance")
x = seq(-2000,2000,length=200)
opar = par(mfrow=c(1,1), mar=c(5.1,4.1,4.1,2.1), oma=c(0,0,0,0))
layout(matrix(c(1,2), ncol = 1), heights = c(0.9,0.1))
par(mar=c(3,4,2,4),oma=c(2,2,1,3))
image(seq(0,51,length=200), seq(-2000,2000,length=200),z.matrix,xlim=c(0,51),ylim=c(-2000,2000),
	xaxt="n",yaxt="n",xlab="",ylab="", col=rev(c("white", "gray40", "gray60", "gray80")))
points(seq(1,50,length=20),(2000/3)*pears[order(BILLS104)],pch=15)
lines(seq(1,50,length=20),(2000/3)*devs[order(BILLS104)],type="h")
abline(0,0, lwd=2)
abline(h=c((x[10]+x[9])/2,(x[40]+x[39])/2,(x[70]+x[69])/2,(x[130]+x[131])/2,
           (x[160]+x[161])/2,(x[191]+x[190])/2), lty=2)
title(xlab = "Order of Fitted Outcome Variable", ylab="Residual Effect",
      line = 1.3, cex.lab=1.3)
title(main="Model Fit Plot",
      line = 1, cex.main=1.7, font.main=1)
par(mar=c(0,1.5,1,1))
plot(0,0, type="n", axes = FALSE, xlab = "", ylab = "")
legend("center", ncol = 2,
       legend = c("Pearson", "Deviances"),
       cex=1, lty=c(0,1), pch = c(15,NA))
par(opar)
Data on censored corruption scale
Description
Data for the corruption example used in chapter 7
Usage
data(corruption)
Format
A data frame with 83 rows and 7 variables:
- ticpi85b
- Country-level compilation of effects into a 0 to 10 scale of increasing government corruption with an adjustment that modifies this range slightly 
- MSO
- Binary variable indicating whether the government owns a majority of key industries 
- LOG.PC.GDP
- Log of the average per capita GDP from 1975 to 1983 
- DEMOCRACY
- Polity IV democracy score from 1975 to 1983 
- GOVGDT
- Average government spending as a percentage of GDP from 1980 to 1983 
- ECONFREE
- Index of the ability of capitalists to invest and move money 
- FEDERAL
- Binary variable indicating whether the government has a federal system during this period 
...
Examples
data(corruption)
attach(corruption)
library(censReg)
## Table 7.5
corruption.tobit.out <- censReg( ticpi85b ~ MSO + LOG.PC.GDP + DEMOCRACY + GOVGDT +
    ECONFREE*FEDERAL,
    left = 2.6, right = 10.70, data=corruption, start = NULL, nGHQ = 8, logLikOnly = FALSE)
summary(corruption.tobit.out)
summary(corruption.tobit.out)$nObs
Coefs <- summary(corruption.tobit.out)$estimate[,1]
SEs <- summary(corruption.tobit.out)$estimate[,2]
CI.low <- Coefs - SEs*1.96
CI.hi <- Coefs + SEs*1.96
( round(out.table <- cbind( Coefs, SEs, CI.low, CI.hi),4) )
Data on capital punishment
Description
Data for the capital punishment example used in chapters 4, 5, and 6
Usage
data(dp)
Format
A data frame with 17 rows and 7 variables:
- EXECUTIONS
- The number of times that capital punishment is implemented on a state level in the United States for the year 1997 
- INCOME
- Median per capita income in dollars 
- PERPOVERTY
- Percent of the population classified as living in poverty 
- PERBLACK
- Percent of Black citizens in the population 
- VC100k96
- Rate of violent crimes per 100,000 residents for the year before (1996) 
- SOUTH
- Dummy variable to indicate whether the state is in the South 
- PROPDEGREE
- Proportion of the population with a college degree 
...
Examples
opar = par(mfrow=c(1,1), mar=c(5.1,4.1,4.1,2.1), oma=c(0,0,0,0))
data(dp)
attach(dp)
## Table 4.2
dp
## Table 5.1
dp.out <- glm(EXECUTIONS ~ INCOME+PERPOVERTY+PERBLACK+log(VC100k96)+
              SOUTH+PROPDEGREE, family=poisson)
dp.cis <- round(glm.summary(dp.out, alpha = 0.05),4)
round(cbind(summary(dp.out)$coef[,1:2], dp.cis),4)
round(dp.out$null.deviance,4);round(dp.out$df.null,4)
round(dp.out$deviance,4);round(dp.out$df.residual,4)
round(logLik(dp.out),4)
round(dp.out$aic,4)
round(vcov(dp.out),4) # variance covariance matrix
## Table 5.2
k <- 200
b5 <- seq(0.1, 5.4,length=k)
w <- rep(0,k)
for(i in 1:k){
  mm <- glm(EXECUTIONS ~ INCOME+PERPOVERTY+PERBLACK+log(VC100k96)+PROPDEGREE,
            offset=b5[i]*SOUTH,family=poisson)
  w[i] <- logLik(mm)
  }
f <- function(b5,x,y,maxloglik){
  mm <- glm(EXECUTIONS ~ INCOME+PERPOVERTY+PERBLACK+log(VC100k96)+PROPDEGREE,
            offset=b5*x,family=poisson)
  logLik(mm) - maxloglik + qchisq(.95,1)/2
  }
low.pll <- uniroot(f,interval=c(1.5,2), x=SOUTH, y=EXECUTIONS, maxloglik=logLik(dp.out))$root
high.pll <- uniroot(f,interval=c(3,4), x=SOUTH, y=EXECUTIONS, maxloglik=logLik(dp.out))$root
w[which.min(abs(w-low.pll))]
round(c(low.pll, high.pll),4)
cbind(round(dp.cis[,3:4],4),
      round(confint(dp.out),4))
## Table 6.2
resp <- resid(dp.out,type="response")
pears <- resid(dp.out,type="pearson")
working <- resid(dp.out,type="working")
devs <- resid(dp.out,type="deviance")
dp.mat <- cbind(rep(1,nrow(dp)), as.matrix(dp[,2:4]), as.matrix(log(dp[,5])),
                as.matrix(dp[,6]), as.matrix(dp[,7]))
dp.resid.mat <- cbind(resp,pears,working,devs)
dimnames(dp.resid.mat)[[2]] <- c("response","pearson","working","deviance")
dimnames(dp.resid.mat)[[1]] <- rownames(dp)
dp.resid.mat2 <- round(dp.resid.mat,4)
resid.df <- data.frame(cbind(dp.resid.mat2[,1], dp.resid.mat2[,2],
      dp.resid.mat2[,3], dp.resid.mat2[,4]))
colnames(resid.df) <- dimnames(dp.resid.mat)[[2]]
resid.df
## Figure 5.1
dp.mat.0 <- cbind(dp.mat[,1:5],rep(0,length=nrow(dp.mat)),dp.mat[,7])
dimnames(dp.mat.0)[[2]] <- names(dp.out$coefficients)
dp.mat.1 <- cbind(dp.mat[,1:5],rep(1,length=nrow(dp.mat)),dp.mat[,7])
dimnames(dp.mat.1)[[2]] <- names(dp.out$coefficients)
tcks = list(seq(0,140,20), seq(0,12,2), seq(0,30,5), seq(0,10,2), seq(0,30,5))
layout(matrix(c(1,1,2,2,3,3,4,4,5,6,6,7,8,8,8,8), ncol=4, byrow = TRUE),
       heights = c(0.3,0.3,0.3,0.1))
par(mar=c(3,3,2,4),oma=c(2,1,1,3))
for (i in 2:(ncol(dp.mat.0)-1))  {
  j = i-1
  if (i==6){
    i <- i+1
    plot(0,0, type = "n", axes=FALSE, xlab = "", ylab="")
  }
  ruler <- seq(min(dp.mat.0[,i]),max(dp.mat.0[,i]),length=1000)
  xbeta0 <- exp(dp.out$coefficients[-i]%*%apply(dp.mat.0[,-i],2,mean)
                + dp.out$coefficients[i]*ruler)
  xbeta1 <- exp(dp.out$coefficients[-i]%*%apply(dp.mat.1[,-i],2,mean)
                + dp.out$coefficients[i]*ruler)
  plot(ruler,xbeta0,type="l", xlab="",ylab="", yaxt="n", xaxt="n",
       ylim=c(min(xbeta0,xbeta1)-2,max(xbeta0,xbeta1)), lwd=3)
  lines(ruler,xbeta1,lty=4, lwd=2)
  axis(1, tck=0.03, cex.axis=0.9, mgp=c(0.3, 0.3, 0), lty=1, lwd=0, lwd.ticks = 1)
  axis(2, at=tcks[[j]],
       tck=0.03, cex.axis=0.9, mgp=c(0.3, 0.3, 0), lty=1, lwd=0, lwd.ticks = 1, las=2)
  title(xlab = paste("Levels of",dimnames(dp.mat.0)[[2]][i]), ylab="Expected executions",
        line = 1.7, cex.lab=1.2)
}
plot(0,0, type = "n", axes=FALSE, xlab = "", ylab="")
par(mar=c(0,1.5,1,1))
plot(0,0, type="n", axes = FALSE, xlab = "", ylab = "")
legend("center", ncol = 2,
       legend = c("South State", "Non-South State"),
       cex=1.1, lty=c(2,1), bty="n", lwd=c(2,3))
par(opar)
## Figure 5.2
par(mar=c(3,3,1,0),oma=c(1,1,1,1))
plot(b5,w,type="l",lwd=3, xaxt="n", yaxt="n", xlab="", ylab="")
abline(h=logLik(dp.out)-qchisq(.95,1)/2,lty=3, col="gray40")
segments(dp.cis[6,3], -45, dp.cis[6,4], -45, lty=6, col="black", lwd=2)
segments(dp.cis[6,3:4], c(-45,-45), dp.cis[6,3:4], c(-55,-55), lty=3, col="gray40")
text(3.5, y=-45, "Wald Test", cex=0.9)
segments(low.pll, -42.5, high.pll, -42.5, lty=2, lwd=2, col="black")
segments(c(low.pll, high.pll), c(-55,-55), c(low.pll, high.pll),
         rep(logLik(dp.out)-qchisq(.95,1)/2,2), lty=3, col="gray40")
text(3.25, y=-42.5, "Profile log-likelihood", cex=0.9, pos=4)
axis(1, tck=0.03, cex.axis=0.9, mgp=c(0.3, 0.3, 0), lty=1, lwd=0, lwd.ticks = 1)
axis(2, tck=0.03, cex.axis=0.9, mgp=c(0.3, 0.3, 0), lty=1, lwd=0, lwd.ticks = 1, las=2)
title(xlab = 'Coefficient of SOUTH', ylab="Profile log-likelihood",
      line = 1.7, cex.lab=1.2)
par(opar)
## Figure 6.1
coef.vector <- NULL
for (i in 1:length(EXECUTIONS))  {
  dp.temp <- glm(EXECUTIONS[-i] ~ INCOME[-i]+PERPOVERTY[-i]+PERBLACK[-i]+log(VC100k96)[-i]+
                   SOUTH[-i]+PROPDEGREE[-i], family=poisson)
  coef.vector <- rbind(coef.vector,dp.temp$coefficients)
}
layout(matrix(c(1,2,3,4,5,6), ncol=2, byrow = TRUE), heights = c(0.33,0.33,0.33))
par(mar=c(3,4.5,2,4),oma=c(2,1,1,3))
for(i in 2:ncol(coef.vector))  {
  x=plot(coef.vector[,i],type="b",xlab="",ylab="",  yaxt="n", xaxt="n", lwd=2,
         ylim=c(min(coef.vector[,i])-abs(min(coef.vector[,i]))*0.25,
         max(coef.vector[,i])+abs(max(coef.vector[,i]))*0.25))
  abline(h=dp.out$coefficients[i])
  axis(1, at =seq(2,16,2), tck=0.03, cex.axis=0.9, mgp=c(0.3, 0.3, 0), lty=1, lwd=0, lwd.ticks = 1)
  if(i==2){
    axis(2, at = seq(5,35,5)/100000, labels = as.expression(paste(seq(5,35,5), "e(-5)", sep="")),
         tck=0.03, cex.axis=0.9, mgp=c(0.3, 0.3, 0), lty=1, lwd=0, lwd.ticks = 1, las=2)
  }
  else{
    axis(2, tck=0.03, cex.axis=0.9, mgp=c(0.3, 0.3, 0), lty=1, lwd=0, lwd.ticks = 1, las=2)
  }
  title(xlab = "Index number",
        line = 1.7, cex.lab=1.2)
  title(ylab=dimnames(dp.mat.0)[[2]][i],
        line = 3.25, cex.lab=1.2)
}
par(opar)
Compute confidence intervals for predictions.
Description
Apply an exponential transformation to the confidence intervals and predictions from binomial and Poisson models.
Usage
glm.cis(preds, ses, alpha, df)
Arguments
| preds | The predictions based on the additive linear component of the model. | 
| ses | The standard error(s) of the prediction. | 
| alpha | The desired confidence level. | 
| df | The desired degrees of freedom. | 
Value
The output is a matrix.
Examples
data(campaign)
attach(campaign)
cmpgn.out <- glm(TOTCONTR ~ CANDGENDER + PARTY + INCUMCHALL + HISPPCT,
                 family=Gamma(link = 'log'), data=campaign)
newdat_gender <- data.frame(CANDGENDER = c('F','M'), PARTY= rep('Democrat',2),
                            INCUMCHALL=rep("C", 2), HISPPCT=rep(mean(campaign$HISPPCT),2))
preds_gender <- predict(cmpgn.out, newdata = newdat_gender, se.fit = TRUE)
glm.cis(preds_gender$fit, preds_gender$se.fit, 0.95,cmpgn.out$df.residual)
Summarize regression output from generalized linear models.
Description
An alternative to the summary() function.
Usage
glm.summary(in.object, alpha = 0.05)
Arguments
| in.object | The regression output from glm(). | 
| alpha | A parameter defaulted to 0.05. | 
Value
The output is a matrix.
Examples
data(campaign)
attach(campaign)
cmpgn.out <- glm(TOTCONTR ~ CANDGENDER + PARTY + INCUMCHALL + HISPPCT,
              family=Gamma(link = 'log'), data=campaign)
glm.summary(cmpgn.out)
Summarize regression output from multinomial generalized linear models.
Description
An alternative to the summary() function.
Usage
glm.summary.multinom(in.object, alpha = 0.05)
Arguments
| in.object | The regression output from multinom(). | 
| alpha | A parameter defaulted to 0.05. | 
Value
The output is a list.
Examples
data(primary)
attach(primary)
library(nnet)
primary.out <- multinom(PRIMVOTE ~ AGE + GENDER + EDUCATION + REGION +
                         RELIGIOSITY + IDEOLOGY + RWA + TRUMPWIN, data=primary)
glm.summary.multinom(primary.out)
Compute variance-covariance matrix.
Description
Calculate the (unscaled) variance-covariance matrix from a generalized linear model regression output. Used in 'GLMpack' within the function 'glm.summary()'.
Usage
glm.vc(obj)
Arguments
| obj | The regression output from glm(). | 
Value
The output is a matrix.
Examples
data(campaign)
attach(campaign)
cmpgn.out <- glm(TOTCONTR ~ CANDGENDER + PARTY + INCUMCHALL + HISPPCT,
              family=Gamma(link = 'log'), data=campaign)
glm.vc(cmpgn.out)
Data on Mexico from the Mexican Family Life Survey
Description
Data for the Mexico example used in chapter 7
Usage
data(mexico)
Format
A data frame with 644 rows and 11 variables:
- SUBJECT
- Subject ID 
- HOUSEHOLD
- Household ID 
- STATE
- State indicator, here 5 for the state of Coahuila 
- MIGR
- Dummy variable indicating whether respondents have thought of migrating 
- NCRIME
- Number of crimes of which the female respondent has been the victim 
- SEV
- Mean seriousness of the crime 
- PAST
- Evaluations of past life conditions 
- FUT
- Evaluations of future community conditions 
- INC
- Respondents' income 
- AGE
- Respondents' age 
- WAVE
- Indicator for the wave under analysis 
...
Examples
data(mexico)
attach(mexico)
## Table 7.3
library(lme4)
cs <- function(x) scale(x,scale=TRUE,center=TRUE) # Function to re-scale
mexico.out <- glmer(MIGR ~ cs(NCRIME) + cs(SEV) + cs(PAST) +
                cs(FUT) +cs(INC) + cs(AGE) + WAVE
              + (1|SUBJECT), data=mexico, family=binomial("logit"),
              glmerControl(optimizer="bobyqa", tolPwrss=5e-2, optCtrl = list(maxfun=10000)))
summary(mexico.out)
se <- sqrt(diag(vcov(mexico.out)))
(cis.mexico.out <- cbind(Est = fixef(mexico.out),
                        LL = fixef(mexico.out) - 1.96 * se,
                        UL = fixef(mexico.out) + 1.96 * se))
Data on the characteristics of peace agreement outcomes
Description
Data for the Peace example used in chapter 7
Usage
data(peace)
Format
A data frame with 216 rows and 9 variables:
- OUTISS
- Ordinal variable indicating the scale of outstanding issues that were not resolved during the peace negotiations with 30 percent zero values 
- PP
- Binary variable indicating whether a rebel force is allowed to transform into a legal political party 
- INTCIV
- Binary variable indicating whether members of the rebel group are to be integrated into the civil service 
- AMN
- Binary variable indicating whether there is an amnesty provision in the agreement 
- PRIS
- Binary variable indicating whether prisoners are released 
- FED
- Binary variable indicating whether a federal state solution is included 
- COMIMP
- Binary variable indicating whether the agreement establishes a commission or committee to oversee implementation 
- REAFFIRM
- Binary variable indicating whether the agreement reaffirms earlier peace agreements 
- PKO
- Binary variable indicating whether or not the peace agreement included the deployment of peacekeeping forces 
...
Examples
data(peace)
attach(peace)
require(pscl)
## Table 7.6
M3 <- zeroinfl(OUTISS ~ PP + INTCIV + AMN + PRIS + FED + COMIMP + REAFFIRM | PKO, data=peace)
summary(M3)
out.table.count <- cbind(summary(M3)$coef$count[,1:2],
    summary(M3)$coef$count[,1] - 1.96*summary(M3)$coef$count[,2],
    summary(M3)$coef$count[,1] + 1.96*summary(M3)$coef$count[,2])
out.table.zero <- cbind(summary(M3)$coef$zero[,1:2],
    summary(M3)$coef$zero[,1] - 1.96*summary(M3)$coef$zero[,2],
    summary(M3)$coef$zero[,1] + 1.96*summary(M3)$coef$zero[,2])
out.table.count
out.table.zero
Data on the 2016 Republican Presidentical Primaries
Description
Data for the primary example used in chapters 4 and 5
Usage
data(primary)
Format
A data frame with 706 rows and 9 variables:
- PRIMVOTE
- Vote intention 
- AGE
- Age 
- GENDER
- Gender 
- EDUCATION
- Education 
- REGION
- Region of the country in which the respondent lives 
- RELIGIOSITY
- Religiosity 
- IDEOLOGY
- Ideology 
- RWA
- Right Wing Authoritarianism scale 
- TRUMPWIN
- Perceptions of whether Trump could win 
...
Examples
opar = par(mfrow=c(1,1), mar=c(5.1,4.1,4.1,2.1), oma=c(0,0,0,0))
data(primary)
attach(primary)
library(nnet)
library(pBrackets)
library(effects)
## Model
primary.out <- multinom(PRIMVOTE ~ AGE + GENDER + EDUCATION + REGION +
                        RELIGIOSITY + IDEOLOGY + RWA + TRUMPWIN, data=primary)
summ.primary.out <- glm.summary.multinom(primary.out)
## Figure 4.2
par(mfrow=c(3,3), mar=c(2.5,2,2,1))
# Plot 1: Electoral preference
countsPV0 <- barplot(table(primary$PRIMVOTE), main="Electoral Preference",
        xlab="Candidates", mgp=c(1.1, 0.2, 1))
text(countsPV0[,1], rep(10,4), as.numeric(table(primary$PRIMVOTE)), cex=1.5)
# Plot 2: Age
countsAGE <- barplot(table(primary$AGE), main="Age",
                     xlab="Age categories", mgp=c(1.1, 0.2, 0))
text(countsAGE[,1], rep(10,4), as.numeric(table(primary$AGE)), cex=1.5)
# Plot 3: Gender
countsGENDER <- barplot(table(primary$GENDER), main="Gender",
                     xlab="Gender categories", mgp=c(1.1, 0.2, 0), ylim=c(0,500))
text(countsGENDER[,1], rep(25,2), as.numeric(table(primary$GENDER)), cex=1.5)
# Plot 4: Education
countsEDUC <- barplot(table(primary$EDUCATION), main="Education",
                        xlab="Schooling level", mgp=c(1.1, 0.2, 0))
text(countsEDUC[,1], rep(10,4), as.numeric(table(primary$EDUCATION)), cex=1.5)
# Plot 5: Region
countsREG <- barplot(table(primary$REGION), main="Region",
                      xlab="Region", mgp=c(1.1, 0.2, 0))
text(countsREG[,1], rep(10,4), as.numeric(table(primary$REGION)), cex=1.5)
hist(primary$RELIGIOSITY,xlab="Religiosity Score",ylab="",
     xlim=c(-1.5,2), ylim=c(0, 225), main="Religiosity",
     col = "gray70", mgp=c(1.1, 0.2, 0))
# Plot 7: Ideology
hist(primary$IDEOLOGY,xlab="Ideology Score",ylab="",
     xlim=c(-2,1.5), ylim=c(0, 150), main="Ideology",
     col = "gray70", mgp=c(1.1, 0.2, 0))
# Plot 8: Right Wing Authoritarianism
hist(primary$RWA,xlab="Right Wing Authoritarianism Score",ylab="",
     xlim=c(-2.5,2), ylim=c(0, 200), main="Authoritarianism",
     col = "gray70", mgp=c(1.1, 0.2, 0))
colnames(primary)
# Plot 9: Could Trump Win?
countsWIN <- barplot(table(primary$TRUMPWIN), main="Trump's winnability",
                     xlab="Perceptions of whether Trump could win",
                     mgp=c(1.1, 0.2, 0), ylim=c(0,550))
text(countsWIN[,1], rep(30,3), as.numeric(table(primary$TRUMPWIN)), cex=1.5)
par(opar)
## Figure 5.3
layout(matrix(1:2, ncol = 1), heights = c(0.9,0.1))
par(mar=c(3,4,0,1),oma=c(1,1,1,1))
plot(summ.primary.out[[1]][,1], type = "n", xaxt="n", yaxt="n",
     xlim=c(-10,3), ylim=c(0,60), ylab="", xlab="")
abline(v=-5, h=c(12,16,28,40,44,48,52), lwd=2)
abline(h=c(4,8,20,24,32,36,56), lty=3, col="gray60")
text(rep(-7.5,15), seq(2,58,4),
     labels = c('30-44', '45-59', '60+',
                'Male',
                'High School','Some College','Bachelor\'s degree or higher',
                'Northeast', 'South', 'West',
                'Religiosity',
                'Ideology',
                'Authoritarianism',
                'Yes', 'Don\'t know'))
segments(summ.primary.out[[1]][-1,3], seq(1,57,4), summ.primary.out[[1]][-1,4],
         seq(1,57,4), col="gray30", lwd=2)
points(summ.primary.out[[1]][-1,1], seq(1,57,4), pch=21, cex=1.4, bg="black")
text(summ.primary.out[[1]][-1,1], seq(1,57,4), labels = "C", cex = 0.7, col="white")
segments(summ.primary.out[[2]][-1,3], seq(3,59,4), summ.primary.out[[2]][-1,4],
         seq(3,59,4), col="gray30", lwd=2)
points(summ.primary.out[[2]][-1,1], seq(3,59,4), pch=21, cex=1.4, bg="white")
text(summ.primary.out[[2]][-1,1], seq(3,59,4), labels = "K", cex = 0.7, col="black")
abline(v=0, lty=2)
axis(1, tck=0.01, at = seq(-5,5,0.5),cex.axis=0.9, mgp=c(0.3, 0.3, 0), lty=1, lwd=0,
     lwd.ticks = 1)
axis(2, tck=0.02, at = c(6,14,22,34,42,46,50,56), labels=c('AGE', 'GENDER',
     'EDUCATION', 'REGION', 'RELIGIOSITY', 'IDEOLOGY', 'RWA', 'TRUMPWIN'),
     cex.axis=0.9, mgp=c(0.3, 0.3, 0), lty=1, lwd=0, lwd.ticks = 0, las=1)
title(xlab = "Coefficient",
      line = 1.7, cex.lab=1.3)
par(mar=c(0,4,0,0))
plot(0,0, type="n", axes = FALSE, xaxt="n", yaxt="n", xlab="", ylab = "")
legend("center", c("Cruz", "Kasich"), ncol=2, pch=c(21,21), pt.bg=c("black", "white"),
       pt.cex=rep(1.4,2), bty = "n")
par(opar)
## Figure 5.4
mygray = rgb(153, 153, 153, alpha = 200, maxColorValue = 255)
mygray2 = rgb(179, 179, 179, alpha = 150, maxColorValue = 255)
mygray3 = rgb(204, 204, 204, alpha = 150, maxColorValue = 255)
preds_win <- Effect("TRUMPWIN", primary.out)
preds_ideol <- Effect("IDEOLOGY", primary.out, xlevels=list(IDEOLOGY=100))
par(mfrow=c(1,2), mar=c(4,3,3,0),oma=c(1,1,1,1))
plot(1:3, preds_win$prob[,1], type="n",xlab="",ylab="",  yaxt="n", xaxt="n",
     xlim=c(0,4), ylim=c(0, 0.7))
segments(rep(1:3, 3), preds_win$lower.prob[,1:3], rep(1:3, 3), preds_win$upper.prob[,1:3],
         col=rep(c('black', 'black', 'gray60'), each=3), lty = rep(c(1,2,1), each=3))
points(rep(1:3,3), preds_win$prob[,1:3], pch=21, cex = 2,
       bg=rep(c("black", "white", "gray60"),each=3), col=rep(c("black", "black", "gray60"),each=3))
text(rep(1:3,3), preds_win$prob[,1:3], labels=rep(c("T", "C", "K"), each=3), cex = 0.8,
     bg=rep(c("black", "white", "gray60"),each=3), col=rep(c("white", "black", "black"),each=3))
axis(1, at=1:3, labels = c("No", "Yes", "DK"), tck=0.03, cex.axis=0.9, mgp=c(0.3, 0.3, 0),
     lty=1, lwd=0, lwd.ticks = 1)
axis(2, tck=0.03, cex.axis=0.9, mgp=c(0.3, 0.3, 0), lty=1, lwd=0, lwd.ticks = 1, las=2)
title(xlab = 'Perceptions of whether Trump could win the election',
      ylab="Probability of voting",
      line = 1.7, cex.lab=1)
title(line = 1, main="Winnability", font.main=3)
plot(preds_ideol$x$IDEOLOGY, preds_ideol$prob[,1], type="n",xlab="",ylab="",  yaxt="n", xaxt="n",
     xlim=c(-2,1.5), ylim=c(0, 0.7))
polygon(c(preds_ideol$x$IDEOLOGY, rev(preds_ideol$x$IDEOLOGY)),
        c(preds_ideol$lower.prob[,2], rev(preds_ideol$upper.prob[,2])), border = NA, col=mygray2)
polygon(c(preds_ideol$x$IDEOLOGY, rev(preds_ideol$x$IDEOLOGY)),
        c(preds_ideol$lower.prob[,1], rev(preds_ideol$upper.prob[,1])), border = NA, col=mygray)
polygon(c(preds_ideol$x$IDEOLOGY, rev(preds_ideol$x$IDEOLOGY)),
        c(preds_ideol$lower.prob[,3], rev(preds_ideol$upper.prob[,3])), border = NA, col=mygray3)
lines(preds_ideol$x$IDEOLOGY, preds_ideol$prob[,1], col="gray20", lwd=2)
lines(preds_ideol$x$IDEOLOGY, preds_ideol$prob[,2], col="gray40", lwd=2, lty=2)
lines(preds_ideol$x$IDEOLOGY, preds_ideol$prob[,3], col="black", lwd=2)
text(rep(1,3), c(min(preds_ideol$prob[,1]), min(preds_ideol$prob[,2]),
     max(preds_ideol$prob[,3]))+.05, labels = c('Trump', 'Cruz', 'Kasich'))
axis(1, tck=0.03, cex.axis=0.9, mgp=c(0.3, 0.3, 0), lty=1, lwd=0, lwd.ticks = 1)
axis(2, tck=0.03, cex.axis=0.9, mgp=c(0.3, 0.3, 0), lty=1, lwd=0, lwd.ticks = 1, las=2)
title(xlab = 'Ideology scores',
      ylab="Probability of voting",
      line = 1.7, cex.lab=1)
title(line = 1, main="Ideology", font.main=3)
par(opar)
Data on the Scottish national parliament vote
Description
Data for the Scotland example used in chapters 4, 5, and 6
Usage
data(scotvote)
Format
A data frame with 32 rows and 7 variables:
- PerYesTax
- Percentage of population who granting Scottish parliament taxation powers 
- CouncilTax
- Council tax collected 
- PerClaimantFemale
- Female percentage of total claims for unemployment benefits as of January 1998 
- StdMortalityRatio
- Standardized mortality rate 
- Active
- Percentage of economically active individuals relative to the population of working age 
- GDP
- GDP per council 
- Percentage5to15
- Percentage of children aged 5 to 15 
...
Examples
data(scotvote)
attach(scotvote)
## Table 4.3
scotvote
## Table 5.3
scottish.vote.glm <- glm((PerYesTax) ~ CouncilTax*PerClaimantFemale+
                     StdMortalityRatio+Active+GDP+Percentage5to15,
                     family=Gamma,data=scotvote)
vote.sum <- summary(scottish.vote.glm)
round(cbind(
  vote.sum$coefficients[,1], vote.sum$coefficients[,2],
  confint(scottish.vote.glm)),4)
vote.sum
## Table 6.3
anova(scottish.vote.glm,test="F")
Data on California state data on educational policy and outcomes
Description
Data for the STAR program example used in chapter 6
Usage
data(star)
Format
A data frame with 303 rows and 16 variables:
- LOWINC
- Proportion of low-income students 
- PERASIAN
- Proportions of Asian students 
- PERBLACK
- Proportions of African-American students 
- PERHISP
- Proportions of Hispanic students 
- PERMINTE
- Percentage of minority teachers 
- AVYRSEXP
- Mean teacher experience in years 
- AVSAL
- Median teacher salary, including benefits, in thousands of dollars 
- PERSPEN
- Per-pupil expenditures in thousands of dollars 
- PTRATIO
- Pupil/teacher ratio in the classroom 
- PCTAF
- Percentage of students taking college credit courses 
- PCTCHRT
- Percentage of schools in the district that are charter schools 
- PCTYRRND
- Percent of schools in the district operating year-round programs 
- READTOT
- Total number of students taking the reading exam in the 9th grade 
- PR50RD
- Proportion of students scoring over the reading median in the 9th grade 
- MATHTOT
- Total number of students taking the math exam in the 9th grade 
- PR50M
- Proportion of students scoring over the math median in the 9th grade 
...
Examples
opar = par(mfrow=c(1,1), mar=c(5.1,4.1,4.1,2.1), oma=c(0,0,0,0))
data(star)
attach(star)
## MATH MODEL
star.logit.fit <- glm(cbind(PR50M,MATHTOT-PR50M) ~ LOWINC + PERASIAN + PERBLACK + PERHISP +
                  PERMINTE * AVYRSEXP * AVSAL + PERSPEN * PTRATIO * PCTAF +
                  PCTCHRT + PCTYRRND, family=binomial(link=logit),data=star)
## READING MODEL
star.logit.fit2 <- glm(cbind(PR50RD,READTOT-PR50RD) ~ LOWINC + PERASIAN + PERBLACK + PERHISP +
                   PERMINTE * AVYRSEXP * AVSAL + PERSPEN * PTRATIO * PCTAF +
                   PCTCHRT + PCTYRRND, family=binomial(link=logit),data=star)
## Table 6.4
star.summ.mat <- round(summary(star.logit.fit)$coef, 4)
data.frame(cbind(star.summ.mat[,1], star.summ.mat[,2], "[", round(confint(star.logit.fit)[,1],4),
      " ~", round(confint(star.logit.fit)[,2],4), "]"))
## Table 6.5
mean.vector <- apply(star,2,mean)
diff.vector <- c(1,mean.vector[1:12],mean.vector[5]*mean.vector[6],mean.vector[5]*mean.vector[7],
                 mean.vector[6]*mean.vector[7],mean.vector[8]*mean.vector[9],
                 mean.vector[8]*mean.vector[10],mean.vector[9]*mean.vector[10],
                 mean.vector[5]*mean.vector[6]*mean.vector[7],
                 mean.vector[8]*mean.vector[9]*mean.vector[10])
names(diff.vector) <- names(summary(star.logit.fit2)$coef[,1])
# PERMINTE FIRST DIFFERENCE ACROSS IQR
logit <- function(vec){return(exp(vec)/(1+exp(vec)))}
logit(c(diff.vector[1:5],6.329,diff.vector[7:13],6.329*mean.vector[6],6.329*mean.vector[7],
        diff.vector[16:19],6.329*mean.vector[6]*mean.vector[7],diff.vector[21])
      %*%summary.glm(star.logit.fit)$coef[,1]) -
logit(c(diff.vector[1:5],19.180,diff.vector[7:13],19.180*mean.vector[6],19.180*mean.vector[7],
          diff.vector[16:19],19.180*mean.vector[6]*mean.vector[7],diff.vector[21])
        %*%summary.glm(star.logit.fit)$coef[,1])
# First quartile information
q1.diff.mat <- q2.diff.mat <- q3.diff.mat <- q4.diff.mat <-
  matrix(rep(diff.vector,length(diff.vector)),
                      nrow=length(diff.vector), ncol=length(diff.vector),
                      dimnames=list(names(diff.vector),names(diff.vector)))
diag(q1.diff.mat)[2:13] <- apply(star,2,summary)[2,1:12]
q1.diff.mat[14,6] <- q1.diff.mat[6,6]*q1.diff.mat[7,6]
q1.diff.mat[15,6] <- q1.diff.mat[6,6]*q1.diff.mat[8,6]
q1.diff.mat[20,6] <- q1.diff.mat[6,6]*q1.diff.mat[7,6]*q1.diff.mat[8,6]
q1.diff.mat[14,7] <- q1.diff.mat[7,7]*q1.diff.mat[6,7]
q1.diff.mat[16,7] <- q1.diff.mat[7,7]*q1.diff.mat[8,7]
q1.diff.mat[20,7] <- q1.diff.mat[6,7]*q1.diff.mat[7,7]*q1.diff.mat[8,7]
q1.diff.mat[15,8] <- q1.diff.mat[8,8]*q1.diff.mat[6,8]
q1.diff.mat[16,8] <- q1.diff.mat[8,8]*q1.diff.mat[7,8]
q1.diff.mat[20,8] <- q1.diff.mat[6,8]*q1.diff.mat[7,8]*q1.diff.mat[8,8]
q1.diff.mat[17,9] <- q1.diff.mat[9,9]*q1.diff.mat[10,9]
q1.diff.mat[18,9] <- q1.diff.mat[9,9]*q1.diff.mat[11,9]
q1.diff.mat[21,9] <- q1.diff.mat[9,9]*q1.diff.mat[10,9]*q1.diff.mat[11,9]
q1.diff.mat[17,10] <- q1.diff.mat[10,10]*q1.diff.mat[9,10]
q1.diff.mat[19,10] <- q1.diff.mat[10,10]*q1.diff.mat[11,10]
q1.diff.mat[21,10] <- q1.diff.mat[9,10]*q1.diff.mat[10,10]*q1.diff.mat[11,10]
q1.diff.mat[18,11] <- q1.diff.mat[11,11]*q1.diff.mat[9,11]
q1.diff.mat[19,11] <- q1.diff.mat[11,11]*q1.diff.mat[10,11]
q1.diff.mat[21,11] <- q1.diff.mat[9,11]*q1.diff.mat[10,11]*q1.diff.mat[11,11]
# Third quartile
diag(q2.diff.mat)[2:13] <- apply(star,2,summary)[5,1:12]
q2.diff.mat[14,6] <- q2.diff.mat[6,6]*q2.diff.mat[7,6]
q2.diff.mat[15,6] <- q2.diff.mat[6,6]*q2.diff.mat[8,6]
q2.diff.mat[20,6] <- q2.diff.mat[6,6]*q2.diff.mat[7,6]*q2.diff.mat[8,6]
q2.diff.mat[14,7] <- q2.diff.mat[7,7]*q2.diff.mat[6,7]
q2.diff.mat[16,7] <- q2.diff.mat[7,7]*q2.diff.mat[8,7]
q2.diff.mat[20,7] <- q2.diff.mat[6,7]*q2.diff.mat[7,7]*q2.diff.mat[8,7]
q2.diff.mat[15,8] <- q2.diff.mat[8,8]*q2.diff.mat[6,8]
q2.diff.mat[16,8] <- q2.diff.mat[8,8]*q2.diff.mat[7,8]
q2.diff.mat[20,8] <- q2.diff.mat[6,8]*q2.diff.mat[7,8]*q2.diff.mat[8,8]
q2.diff.mat[17,9] <- q2.diff.mat[9,9]*q2.diff.mat[10,9]
q2.diff.mat[18,9] <- q2.diff.mat[9,9]*q2.diff.mat[11,9]
q2.diff.mat[21,9] <- q2.diff.mat[9,9]*q2.diff.mat[10,9]*q2.diff.mat[11,9]
q2.diff.mat[17,10] <- q2.diff.mat[10,10]*q2.diff.mat[9,10]
q2.diff.mat[19,10] <- q2.diff.mat[10,10]*q2.diff.mat[11,10]
q2.diff.mat[21,10] <- q2.diff.mat[9,10]*q2.diff.mat[10,10]*q2.diff.mat[11,10]
q2.diff.mat[18,11] <- q2.diff.mat[11,11]*q2.diff.mat[9,11]
q2.diff.mat[19,11] <- q2.diff.mat[11,11]*q2.diff.mat[10,11]
q2.diff.mat[21,11] <- q2.diff.mat[9,11]*q2.diff.mat[10,11]*q2.diff.mat[11,11]
# Minimum
diag(q3.diff.mat)[2:13] <- apply(star,2,summary)[1,1:12]
q3.diff.mat[14,6] <- q3.diff.mat[6,6]*q3.diff.mat[7,6]
q3.diff.mat[15,6] <- q3.diff.mat[6,6]*q3.diff.mat[8,6]
q3.diff.mat[20,6] <- q3.diff.mat[6,6]*q3.diff.mat[7,6]*q3.diff.mat[8,6]
q3.diff.mat[14,7] <- q3.diff.mat[7,7]*q3.diff.mat[6,7]
q3.diff.mat[16,7] <- q3.diff.mat[7,7]*q3.diff.mat[8,7]
q3.diff.mat[20,7] <- q3.diff.mat[6,7]*q3.diff.mat[7,7]*q3.diff.mat[8,7]
q3.diff.mat[15,8] <- q3.diff.mat[8,8]*q3.diff.mat[6,8]
q3.diff.mat[16,8] <- q3.diff.mat[8,8]*q3.diff.mat[7,8]
q3.diff.mat[20,8] <- q3.diff.mat[6,8]*q3.diff.mat[7,8]*q3.diff.mat[8,8]
q3.diff.mat[17,9] <- q3.diff.mat[9,9]*q3.diff.mat[10,9]
q3.diff.mat[18,9] <- q3.diff.mat[9,9]*q3.diff.mat[11,9]
q3.diff.mat[21,9] <- q3.diff.mat[9,9]*q3.diff.mat[10,9]*q3.diff.mat[11,9]
q3.diff.mat[17,10] <- q3.diff.mat[10,10]*q3.diff.mat[9,10]
q3.diff.mat[19,10] <- q3.diff.mat[10,10]*q3.diff.mat[11,10]
q3.diff.mat[21,10] <- q3.diff.mat[9,10]*q3.diff.mat[10,10]*q3.diff.mat[11,10]
q3.diff.mat[18,11] <- q3.diff.mat[11,11]*q3.diff.mat[9,11]
q3.diff.mat[19,11] <- q3.diff.mat[11,11]*q3.diff.mat[10,11]
q3.diff.mat[21,11] <- q3.diff.mat[9,11]*q3.diff.mat[10,11]*q3.diff.mat[11,11]
diag(q4.diff.mat)[2:13] <- apply(star,2,summary)[6,1:12]
q4.diff.mat[14,6] <- q4.diff.mat[6,6]*q4.diff.mat[7,6]
q4.diff.mat[15,6] <- q4.diff.mat[6,6]*q4.diff.mat[8,6]
q4.diff.mat[20,6] <- q4.diff.mat[6,6]*q4.diff.mat[7,6]*q2.diff.mat[8,6]
q4.diff.mat[14,7] <- q4.diff.mat[7,7]*q4.diff.mat[6,7]
q4.diff.mat[16,7] <- q4.diff.mat[7,7]*q4.diff.mat[8,7]
q4.diff.mat[20,7] <- q4.diff.mat[6,7]*q4.diff.mat[7,7]*q4.diff.mat[8,7]
q4.diff.mat[15,8] <- q4.diff.mat[8,8]*q4.diff.mat[6,8]
q4.diff.mat[16,8] <- q4.diff.mat[8,8]*q4.diff.mat[7,8]
q4.diff.mat[20,8] <- q4.diff.mat[6,8]*q4.diff.mat[7,8]*q4.diff.mat[8,8]
q4.diff.mat[17,9] <- q4.diff.mat[9,9]*q4.diff.mat[10,9]
q4.diff.mat[18,9] <- q4.diff.mat[9,9]*q4.diff.mat[11,9]
q4.diff.mat[21,9] <- q4.diff.mat[9,9]*q4.diff.mat[10,9]*q4.diff.mat[11,9]
q4.diff.mat[17,10] <- q4.diff.mat[10,10]*q4.diff.mat[9,10]
q4.diff.mat[19,10] <- q4.diff.mat[10,10]*q4.diff.mat[11,10]
q4.diff.mat[21,10] <- q4.diff.mat[9,10]*q4.diff.mat[10,10]*q4.diff.mat[11,10]
q4.diff.mat[18,11] <- q4.diff.mat[11,11]*q4.diff.mat[9,11]
q4.diff.mat[19,11] <- q4.diff.mat[11,11]*q4.diff.mat[10,11]
q4.diff.mat[21,11] <- q4.diff.mat[9,11]*q4.diff.mat[10,11]*q4.diff.mat[11,11]
first_diffs <- NULL
for (i in 2:13){
        temp1 <- logit(q2.diff.mat[,i]%*%summary.glm(star.logit.fit)$coef[,1]) -
                        logit(q1.diff.mat[,i]%*%summary.glm(star.logit.fit)$coef[,1])
        temp2 <- logit(q4.diff.mat[,i]%*%summary.glm(star.logit.fit)$coef[,1]) -
          logit(q3.diff.mat[,i]%*%summary.glm(star.logit.fit)$coef[,1])
        first_diffs <- rbind(first_diffs, c(temp1,temp2))
}
first_diffs <- round(first_diffs,4)
diffs_mat <- cbind(diag(q1.diff.mat)[2:13], diag(q2.diff.mat)[2:13],
                   first_diffs[,1],
                   diag(q3.diff.mat)[2:13], diag(q4.diff.mat)[2:13],
                   first_diffs[,2])
colnames(diffs_mat) <- c("1st quartile", "3rd quartile", "Interquartile 1st diff",
                         "Min", "Max", "Full range 1st diff")
diffs_mat
star.mu <- predict.glm(star.logit.fit,type="response")
star.y <- PR50M/MATHTOT
star.n <- length(star.y)
PR50M.adj <- PR50M
for (i in 1:length(PR50M.adj))  {
  if (PR50M.adj[i] > mean(PR50M)) PR50M.adj[i] <- PR50M.adj[i] - 0.5
  if (PR50M.adj[i] < mean(PR50M)) PR50M.adj[i] <- PR50M.adj[i] + 0.5
}
par(mfrow=c(1,3), mar=c(6,3,6,2),oma=c(4,1,4,1))
plot(star.mu,star.y,xlab="",ylab="", yaxt='n', xaxt="n", pch="+")
axis(1, tck=0.02, cex.axis=0.9, mgp=c(0.3, 0.3, 0), lty=1, lwd=0, lwd.ticks = 1)
axis(2, tck=0.02, cex.axis=0.9, mgp=c(0.3, 0.3, 0), lty=1, lwd=0, lwd.ticks = 1, las=2)
title(xlab = "Fitted values", ylab="Observed values",
      line = 1.7, cex.lab=1.3)
title(main="Model Fit Plot",
      line = 1, cex.main=1.7, font.main=1)
abline(lm(star.y~star.mu)$coefficients, lwd=2)
plot(fitted(star.logit.fit),resid(star.logit.fit,type="pearson"),xlab="",ylab="",
     yaxt='n', xaxt="n", pch="+")
axis(1, tck=0.02, cex.axis=0.9, mgp=c(0.3, 0.3, 0), lty=1, lwd=0, lwd.ticks = 1)
axis(2, tck=0.02, cex.axis=0.9, mgp=c(0.3, 0.3, 0), lty=1, lwd=0, lwd.ticks = 1, las=2)
title(xlab = "Fitted values", ylab="Pearson Residuals",
      line = 1.7, cex.lab=1.3)
title(main="Residual Dependence Plot",
      line = 1, cex.main=1.7, font.main=1)
abline(0,0, lwd=2)
qqnorm(resid(star.logit.fit,type="deviance"),main="",xlab="",ylab="",
       yaxt='n', xaxt="n", pch="+")
axis(1, tck=0.02, cex.axis=0.9, mgp=c(0.3, 0.3, 0), lty=1, lwd=0, lwd.ticks = 1)
axis(2, tck=0.02, cex.axis=0.9, mgp=c(0.3, 0.3, 0), lty=1, lwd=0, lwd.ticks = 1, las=2)
title(xlab = "Quantiles of N(0,1)", ylab="Deviance Residual Quantiles",
      line = 1.7, cex.lab=1.3)
title(main="Normal-Quantile Plot",
      line = 1, cex.main=1.7, font.main=1)
abline(-0.3,3.5, lwd=2)
par(opar)
Data on suicides in 2009 in OECD member states
Description
Data for the suicide example used in chapter 7
Usage
data(suicide)
Format
A data frame with 32 rows and 7 variables:
- COUNTRYCODE
- Country code 
- COUNTRYNAME
- Name of the country 
- YEAR
- Year 
- DEATHS
- Number of suicides in the country per 100,000 individuals 
- GDP
- GDP in thousands of dollars 
- SUBABUSE
- Share of the population with alcohol or drug use disorder 
- TEMP
- Average temperature 
...
Examples
opar = par(mfrow=c(1,1), mar=c(5.1,4.1,4.1,2.1), oma=c(0,0,0,0))
data(suicide)
attach(suicide)
## Table 7.2
# Poisson model
suic.out.p <- glm(DEATHS ~ GDP + TEMP + SUBABUSE, family = poisson)
summary(suic.out.p)
round(confint(suic.out.p),3)
coefs_poisson <- summary(suic.out.p)$coefficients[1:4,]
coefs_poisson
suic.out.qp <- glm(DEATHS ~ GDP + TEMP + SUBABUSE, family = quasipoisson)
summary(suic.out.qp)
round(confint(suic.out.qp),3)
coefs_quasipoisson <- summary(suic.out.qp)$coefficients[1:4,]
coefs_quasipoisson
## Figure 7.1
layout(matrix(c(1,2,3,4), ncol=2, byrow = TRUE))
par(mar=c(4,3,2,0),oma=c(1,1,1,1))
# Histogram #1
hist(TEMP,xlab="",ylab="",  yaxt="n", xaxt="n", main="", col="gray50", border = "gray30",
    ylim=c(0,15))
axis(1, tck=0, mgp=c(0, 0, 0), lty=1, lwd=0, lwd.ticks = 0)
axis(2, tck=0.03, cex.axis=0.9, mgp=c(0.3, 0.3, 0), lty=1, lwd=1, lwd.ticks = 1, las=2)
title(xlab = 'Mean temperature (in Celsius)', ylab="",
      line = 1.7, cex.lab=1.2)
title(line = 1, main="Temperature", font.main=3)
# Histogram #2
hist(GDP,xlab="",ylab="",  yaxt="n", xaxt="n", main="", col="gray50", border = "gray30",
     ylim=c(0,15))
axis(1, tck=0, mgp=c(0, 0, 0), lty=1, lwd=0, lwd.ticks = 0)
axis(2, tck=0.03, cex.axis=0.9, mgp=c(0.3, 0.3, 0), lty=1, lwd=1, lwd.ticks = 1, las=2)
title(xlab = 'GDP per capita (in thousands of dollars)', ylab="",
      line = 1.7, cex.lab=1.2)
title(line = 1, main="Economic Conditions", font.main=3)
# Histogram #3
hist(SUBABUSE,xlab="",ylab="",  yaxt="n", xaxt="n", main="", col="gray50", border = "gray30",
     ylim=c(0,15))
axis(1, tck=0, mgp=c(0, 0, 0), lty=1, lwd=0, lwd.ticks = 0)
axis(2, tck=0.03, cex.axis=0.9, mgp=c(0.3, 0.3, 0), lty=1, lwd=1, lwd.ticks = 1, las=2)
title(xlab = '% of population with alcohol or drug use disorders', ylab="",line = 1.7, cex.lab=1.2)
title(line = 1, main="Substance abuse", font.main=3)
# Histogram #4
hist(DEATHS,xlab="",ylab="",  yaxt="n", xaxt="n", main="", col="gray10", border = "gray20",
     ylim=c(0,15))
axis(1, tck=0, mgp=c(0, 0, 0), lty=1, lwd=0, lwd.ticks = 0)
axis(2, tck=0.03, cex.axis=0.9, mgp=c(0.3, 0.3, 0), lty=1, lwd=1, lwd.ticks = 1, las=2)
title(xlab = 'Number of suicides per 100,000 people', ylab="",
      line = 1.7, cex.lab=1.2)
title(line = 1, main="Suicide rate", font.main=3)
par(opar)
## Figure 7.2
newdat1 <- data.frame(GDP=seq(13, 70.5, 1), TEMP=rep(mean(TEMP), 58),
                      SUBABUSE=rep(mean(SUBABUSE), 58))
newdat2 <- data.frame(GDP=rep(mean(GDP), 61), TEMP=rep(mean(TEMP), 61), SUBABUSE=seq(0,6,0.1))
preds.qp.gdp <- predict(suic.out.qp, newdata = newdat1, type = "link", se.fit = TRUE)
preds.qp.subabuse <- predict(suic.out.qp, newdata = newdat2, type = "link", se.fit = TRUE)
ilink.qp <- family(suic.out.qp)$linkinv
cis.p.preds.qp.gdp <- cbind(ilink.qp(preds.qp.gdp$fit - (2 * preds.qp.gdp$se.fit)),
                            ilink.qp(preds.qp.gdp$fit + (2 * preds.qp.gdp$se.fit)))
cis.p.preds.qp.subabuse <- cbind(ilink.qp(preds.qp.subabuse$fit - (2 * preds.qp.subabuse$se.fit)),
                            ilink.qp(preds.qp.subabuse$fit + (2 * preds.qp.subabuse$se.fit)))
mygray = rgb(153, 153, 153, alpha = 200, maxColorValue = 255)
par(mar=c(4,3,1,0),oma=c(1,1,1,1), mfrow=c(1,2))
plot(newdat1$GDP, ilink.qp(preds.qp.gdp$fit), type="n",xlab="",ylab="",  yaxt="n", xaxt="n", lwd=2,
     ylim = c(0,37))
polygon(c(newdat1$GDP,rev(newdat1$GDP)), c(cis.p.preds.qp.gdp[,1],rev(cis.p.preds.qp.gdp[,2])),
        border = NA, col = mygray)
lines(newdat1$GDP, ilink.qp(preds.qp.gdp$fit))
points(GDP, DEATHS, pch="+", col="gray20", cex=0.8)
axis(1, tck=0.03, cex.axis=0.9, at=seq(20,70,10), labels = seq(20,70,10), mgp=c(0.3, 0.3, 0),
     lty=1, lwd=0, lwd.ticks = 1)
axis(2, tck=0.03, cex.axis=0.9, mgp=c(0.3, 0.3, 0), lty=1, lwd=0, lwd.ticks = 1, las=2)
title(xlab = 'GDP per capita (in thousands of dollars)',
      ylab="Number of suicides per 100,000 people", line = 1.7, cex.lab=1.2)
plot(newdat2$SUBABUSE, ilink.qp(preds.qp.subabuse$fit), type="n",xlab="",ylab="",
     yaxt="n", xaxt="n", lwd=2, ylim = c(0,37))
polygon(c(newdat2$SUBABUSE,rev(newdat2$SUBABUSE)), c(cis.p.preds.qp.subabuse[,1],
        rev(cis.p.preds.qp.subabuse[,2])), border = NA, col = mygray)
lines(newdat2$SUBABUSE, ilink.qp(preds.qp.subabuse$fit))
points(SUBABUSE, DEATHS, pch="+", col="gray20", cex=0.8)
axis(1, tck=0.03, cex.axis=0.9, mgp=c(0.3, 0.3, 0), lty=1, lwd=0, lwd.ticks = 1)
axis(2, tck=0.03, cex.axis=0.9, mgp=c(0.3, 0.3, 0), lty=1, lwd=0, lwd.ticks = 1, las=2)
title(xlab = '% of population with alcohol or drug use disorders', ylab="",
      line = 1.7, cex.lab=1.2)
par(opar)