The housing data frame has 72 rows and 5 variables.

housing

Format

Sat

Satisfaction of householders with their present housing circumstances, (High, Medium or Low, ordered factor).

Infl

Perceived degree of influence householders have on the management of the property (High, Medium, Low).

Type

Type of rental accommodation, (Tower, Atrium, Apartment, Terrace).

Cont

Contact residents are afforded with other residents, (Low, High).

Freq

Frequencies: the numbers of residents in each class.

Source

Madsen, M. (1976) Statistical analysis of multiple contingency tables. Two examples. Scand. J. Statist. 3, 97–106.

Cox, D. R. and Snell, E. J. (1984) Applied Statistics, Principles and Examples. Chapman & Hall.

References

Venables, W. N. and Ripley, B. D. (2002) Modern Applied Statistics with S. Fourth edition. Springer.

Examples

options(contrasts = c("contr.treatment", "contr.poly"))

# Surrogate Poisson models
house.glm0 <- glm(Freq ~ Infl*Type*Cont + Sat, family = poisson,
                  data = housing)
## IGNORE_RDIFF_BEGIN
summary(house.glm0, correlation = FALSE)
#> 
#> Call:
#> glm(formula = Freq ~ Infl * Type * Cont + Sat, family = poisson, 
#>     data = housing)
#> 
#> Coefficients:
#>                                     Estimate Std. Error z value Pr(>|z|)    
#> (Intercept)                        3.136e+00  1.196e-01  26.225  < 2e-16 ***
#> InflMedium                         2.733e-01  1.586e-01   1.723 0.084868 .  
#> InflHigh                          -2.054e-01  1.784e-01  -1.152 0.249511    
#> TypeApartment                      3.666e-01  1.555e-01   2.357 0.018403 *  
#> TypeAtrium                        -7.828e-01  2.134e-01  -3.668 0.000244 ***
#> TypeTerrace                       -8.145e-01  2.157e-01  -3.775 0.000160 ***
#> ContHigh                          -5.066e-15  1.690e-01   0.000 1.000000    
#> Sat.L                              1.159e-01  4.038e-02   2.871 0.004094 ** 
#> Sat.Q                              2.629e-01  4.515e-02   5.824 5.76e-09 ***
#> InflMedium:TypeApartment          -1.177e-01  2.086e-01  -0.564 0.572571    
#> InflHigh:TypeApartment             1.753e-01  2.279e-01   0.769 0.441783    
#> InflMedium:TypeAtrium             -4.068e-01  3.035e-01  -1.340 0.180118    
#> InflHigh:TypeAtrium               -1.692e-01  3.294e-01  -0.514 0.607433    
#> InflMedium:TypeTerrace             6.292e-03  2.860e-01   0.022 0.982450    
#> InflHigh:TypeTerrace              -9.305e-02  3.280e-01  -0.284 0.776633    
#> InflMedium:ContHigh               -1.398e-01  2.279e-01  -0.613 0.539715    
#> InflHigh:ContHigh                 -6.091e-01  2.800e-01  -2.176 0.029585 *  
#> TypeApartment:ContHigh             5.029e-01  2.109e-01   2.385 0.017083 *  
#> TypeAtrium:ContHigh                6.774e-01  2.751e-01   2.462 0.013811 *  
#> TypeTerrace:ContHigh               1.099e+00  2.675e-01   4.106 4.02e-05 ***
#> InflMedium:TypeApartment:ContHigh  5.359e-02  2.862e-01   0.187 0.851450    
#> InflHigh:TypeApartment:ContHigh    1.462e-01  3.380e-01   0.432 0.665390    
#> InflMedium:TypeAtrium:ContHigh     1.555e-01  3.907e-01   0.398 0.690597    
#> InflHigh:TypeAtrium:ContHigh       4.782e-01  4.441e-01   1.077 0.281619    
#> InflMedium:TypeTerrace:ContHigh   -4.980e-01  3.671e-01  -1.357 0.174827    
#> InflHigh:TypeTerrace:ContHigh     -4.470e-01  4.545e-01  -0.984 0.325326    
#> ---
#> Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
#> 
#> (Dispersion parameter for poisson family taken to be 1)
#> 
#>     Null deviance: 833.66  on 71  degrees of freedom
#> Residual deviance: 217.46  on 46  degrees of freedom
#> AIC: 610.43
#> 
#> Number of Fisher Scoring iterations: 5
#> 
## IGNORE_RDIFF_END

addterm(house.glm0, ~. + Sat:(Infl+Type+Cont), test = "Chisq")
#> Single term additions
#> 
#> Model:
#> Freq ~ Infl * Type * Cont + Sat
#>          Df Deviance    AIC     LRT   Pr(Chi)    
#> <none>        217.46 610.43                      
#> Infl:Sat  4   111.08 512.05 106.371 < 2.2e-16 ***
#> Type:Sat  6   156.79 561.76  60.669 3.292e-11 ***
#> Cont:Sat  2   212.33 609.30   5.126   0.07708 .  
#> ---
#> Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1

house.glm1 <- update(house.glm0, . ~ . + Sat*(Infl+Type+Cont))
summary(house.glm1, correlation = FALSE)
#> 
#> Call:
#> glm(formula = Freq ~ Infl + Type + Cont + Sat + Infl:Type + Infl:Cont + 
#>     Type:Cont + Infl:Sat + Type:Sat + Cont:Sat + Infl:Type:Cont, 
#>     family = poisson, data = housing)
#> 
#> Coefficients:
#>                                    Estimate Std. Error z value Pr(>|z|)    
#> (Intercept)                        3.135074   0.120112  26.101  < 2e-16 ***
#> InflMedium                         0.248327   0.159979   1.552 0.120602    
#> InflHigh                          -0.412645   0.184947  -2.231 0.025671 *  
#> TypeApartment                      0.292524   0.157477   1.858 0.063231 .  
#> TypeAtrium                        -0.792847   0.214413  -3.698 0.000218 ***
#> TypeTerrace                       -1.018074   0.221263  -4.601 4.20e-06 ***
#> ContHigh                          -0.001407   0.169711  -0.008 0.993385    
#> Sat.L                             -0.098106   0.112592  -0.871 0.383570    
#> Sat.Q                              0.285657   0.122283   2.336 0.019489 *  
#> InflMedium:TypeApartment          -0.017882   0.210496  -0.085 0.932302    
#> InflHigh:TypeApartment             0.386869   0.233297   1.658 0.097263 .  
#> InflMedium:TypeAtrium             -0.360311   0.304979  -1.181 0.237432    
#> InflHigh:TypeAtrium               -0.036788   0.334793  -0.110 0.912503    
#> InflMedium:TypeTerrace             0.185154   0.288892   0.641 0.521580    
#> InflHigh:TypeTerrace               0.310749   0.334815   0.928 0.353345    
#> InflMedium:ContHigh               -0.200060   0.228748  -0.875 0.381799    
#> InflHigh:ContHigh                 -0.725790   0.282352  -2.571 0.010155 *  
#> TypeApartment:ContHigh             0.569691   0.212152   2.685 0.007247 ** 
#> TypeAtrium:ContHigh                0.702115   0.276056   2.543 0.010979 *  
#> TypeTerrace:ContHigh               1.215930   0.269968   4.504 6.67e-06 ***
#> InflMedium:Sat.L                   0.519627   0.096830   5.366 8.03e-08 ***
#> InflHigh:Sat.L                     1.140302   0.118180   9.649  < 2e-16 ***
#> InflMedium:Sat.Q                  -0.064474   0.102666  -0.628 0.530004    
#> InflHigh:Sat.Q                     0.115436   0.127798   0.903 0.366380    
#> TypeApartment:Sat.L               -0.520170   0.109793  -4.738 2.16e-06 ***
#> TypeAtrium:Sat.L                  -0.288484   0.149551  -1.929 0.053730 .  
#> TypeTerrace:Sat.L                 -0.998666   0.141527  -7.056 1.71e-12 ***
#> TypeApartment:Sat.Q                0.055418   0.118515   0.468 0.640068    
#> TypeAtrium:Sat.Q                  -0.273820   0.149713  -1.829 0.067405 .  
#> TypeTerrace:Sat.Q                 -0.032328   0.149251  -0.217 0.828520    
#> ContHigh:Sat.L                     0.340703   0.087778   3.881 0.000104 ***
#> ContHigh:Sat.Q                    -0.097929   0.094068  -1.041 0.297851    
#> InflMedium:TypeApartment:ContHigh  0.046900   0.286212   0.164 0.869837    
#> InflHigh:TypeApartment:ContHigh    0.126229   0.338208   0.373 0.708979    
#> InflMedium:TypeAtrium:ContHigh     0.157239   0.390719   0.402 0.687364    
#> InflHigh:TypeAtrium:ContHigh       0.478611   0.444244   1.077 0.281320    
#> InflMedium:TypeTerrace:ContHigh   -0.500162   0.367135  -1.362 0.173091    
#> InflHigh:TypeTerrace:ContHigh     -0.463099   0.454713  -1.018 0.308467    
#> ---
#> Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
#> 
#> (Dispersion parameter for poisson family taken to be 1)
#> 
#>     Null deviance: 833.657  on 71  degrees of freedom
#> Residual deviance:  38.662  on 34  degrees of freedom
#> AIC: 455.63
#> 
#> Number of Fisher Scoring iterations: 4
#> 

1 - pchisq(deviance(house.glm1), house.glm1$df.residual)
#> [1] 0.2671363

dropterm(house.glm1, test = "Chisq")
#> Single term deletions
#> 
#> Model:
#> Freq ~ Infl + Type + Cont + Sat + Infl:Type + Infl:Cont + Type:Cont + 
#>     Infl:Sat + Type:Sat + Cont:Sat + Infl:Type:Cont
#>                Df Deviance    AIC     LRT   Pr(Chi)    
#> <none>              38.662 455.63                      
#> Infl:Sat        4  147.780 556.75 109.117 < 2.2e-16 ***
#> Type:Sat        6  100.889 505.86  62.227 1.586e-11 ***
#> Cont:Sat        2   54.722 467.69  16.060 0.0003256 ***
#> Infl:Type:Cont  6   43.952 448.92   5.290 0.5072454    
#> ---
#> Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1

addterm(house.glm1, ~. + Sat:(Infl+Type+Cont)^2, test  =  "Chisq")
#> Single term additions
#> 
#> Model:
#> Freq ~ Infl + Type + Cont + Sat + Infl:Type + Infl:Cont + Type:Cont + 
#>     Infl:Sat + Type:Sat + Cont:Sat + Infl:Type:Cont
#>               Df Deviance    AIC     LRT Pr(Chi)  
#> <none>             38.662 455.63                  
#> Infl:Type:Sat 12   16.107 457.08 22.5550 0.03175 *
#> Infl:Cont:Sat  4   37.472 462.44  1.1901 0.87973  
#> Type:Cont:Sat  6   28.256 457.23 10.4064 0.10855  
#> ---
#> Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1

hnames <- lapply(housing[, -5], levels) # omit Freq
newData <- expand.grid(hnames)
newData$Sat <- ordered(newData$Sat)
house.pm <- predict(house.glm1, newData,
                    type = "response")  # poisson means
house.pm <- matrix(house.pm, ncol = 3, byrow = TRUE,
                   dimnames = list(NULL, hnames[[1]]))
house.pr <- house.pm/drop(house.pm %*% rep(1, 3))
cbind(expand.grid(hnames[-1]), round(house.pr, 2))
#>      Infl      Type Cont  Low Medium High
#> 1     Low     Tower  Low 0.40   0.26 0.34
#> 2  Medium     Tower  Low 0.26   0.27 0.47
#> 3    High     Tower  Low 0.15   0.19 0.66
#> 4     Low Apartment  Low 0.54   0.23 0.23
#> 5  Medium Apartment  Low 0.39   0.26 0.34
#> 6    High Apartment  Low 0.26   0.21 0.53
#> 7     Low    Atrium  Low 0.43   0.32 0.25
#> 8  Medium    Atrium  Low 0.30   0.35 0.36
#> 9    High    Atrium  Low 0.19   0.27 0.54
#> 10    Low   Terrace  Low 0.65   0.22 0.14
#> 11 Medium   Terrace  Low 0.51   0.27 0.22
#> 12   High   Terrace  Low 0.37   0.24 0.39
#> 13    Low     Tower High 0.30   0.28 0.42
#> 14 Medium     Tower High 0.18   0.27 0.54
#> 15   High     Tower High 0.10   0.19 0.71
#> 16    Low Apartment High 0.44   0.27 0.30
#> 17 Medium Apartment High 0.30   0.28 0.42
#> 18   High Apartment High 0.18   0.21 0.61
#> 19    Low    Atrium High 0.33   0.36 0.31
#> 20 Medium    Atrium High 0.22   0.36 0.42
#> 21   High    Atrium High 0.13   0.27 0.60
#> 22    Low   Terrace High 0.55   0.27 0.19
#> 23 Medium   Terrace High 0.40   0.31 0.29
#> 24   High   Terrace High 0.27   0.26 0.47

# Iterative proportional scaling
loglm(Freq ~ Infl*Type*Cont + Sat*(Infl+Type+Cont), data = housing)
#> Call:
#> loglm(formula = Freq ~ Infl * Type * Cont + Sat * (Infl + Type + 
#>     Cont), data = housing)
#> 
#> Statistics:
#>                       X^2 df  P(> X^2)
#> Likelihood Ratio 38.66222 34 0.2671359
#> Pearson          38.90831 34 0.2582333


# multinomial model
library(nnet)
(house.mult<- multinom(Sat ~ Infl + Type + Cont, weights = Freq,
                       data = housing))
#> # weights:  24 (14 variable)
#> initial  value 1846.767257 
#> iter  10 value 1747.045232
#> final  value 1735.041933 
#> converged
#> Call:
#> multinom(formula = Sat ~ Infl + Type + Cont, data = housing, 
#>     weights = Freq)
#> 
#> Coefficients:
#>        (Intercept) InflMedium  InflHigh TypeApartment TypeAtrium TypeTerrace
#> Medium  -0.4192316  0.4464003 0.6649367    -0.4356851  0.1313663  -0.6665728
#> High    -0.1387453  0.7348626 1.6126294    -0.7356261 -0.4079808  -1.4123333
#>         ContHigh
#> Medium 0.3608513
#> High   0.4818236
#> 
#> Residual Deviance: 3470.084 
#> AIC: 3498.084 
house.mult2 <- multinom(Sat ~ Infl*Type*Cont, weights = Freq,
                        data = housing)
#> # weights:  75 (48 variable)
#> initial  value 1846.767257 
#> iter  10 value 1734.465581
#> iter  20 value 1717.220153
#> iter  30 value 1715.760679
#> iter  40 value 1715.713306
#> final  value 1715.710836 
#> converged
anova(house.mult, house.mult2)
#> Likelihood ratio tests of Multinomial Models
#> 
#> Response: Sat
#>                Model Resid. df Resid. Dev   Test    Df LR stat.   Pr(Chi)
#> 1 Infl + Type + Cont       130   3470.084                                
#> 2 Infl * Type * Cont        96   3431.422 1 vs 2    34 38.66219 0.2671367

house.pm <- predict(house.mult, expand.grid(hnames[-1]), type = "probs")
cbind(expand.grid(hnames[-1]), round(house.pm, 2))
#>      Infl      Type Cont  Low Medium High
#> 1     Low     Tower  Low 0.40   0.26 0.34
#> 2  Medium     Tower  Low 0.26   0.27 0.47
#> 3    High     Tower  Low 0.15   0.19 0.66
#> 4     Low Apartment  Low 0.54   0.23 0.23
#> 5  Medium Apartment  Low 0.39   0.26 0.34
#> 6    High Apartment  Low 0.26   0.21 0.53
#> 7     Low    Atrium  Low 0.43   0.32 0.25
#> 8  Medium    Atrium  Low 0.30   0.35 0.36
#> 9    High    Atrium  Low 0.19   0.27 0.54
#> 10    Low   Terrace  Low 0.65   0.22 0.14
#> 11 Medium   Terrace  Low 0.51   0.27 0.22
#> 12   High   Terrace  Low 0.37   0.24 0.39
#> 13    Low     Tower High 0.30   0.28 0.42
#> 14 Medium     Tower High 0.18   0.27 0.54
#> 15   High     Tower High 0.10   0.19 0.71
#> 16    Low Apartment High 0.44   0.27 0.30
#> 17 Medium Apartment High 0.30   0.28 0.42
#> 18   High Apartment High 0.18   0.21 0.61
#> 19    Low    Atrium High 0.33   0.36 0.31
#> 20 Medium    Atrium High 0.22   0.36 0.42
#> 21   High    Atrium High 0.13   0.27 0.60
#> 22    Low   Terrace High 0.55   0.27 0.19
#> 23 Medium   Terrace High 0.40   0.31 0.29
#> 24   High   Terrace High 0.27   0.26 0.47

# proportional odds model
house.cpr <- apply(house.pr, 1, cumsum)
logit <- function(x) log(x/(1-x))
house.ld <- logit(house.cpr[2, ]) - logit(house.cpr[1, ])
(ratio <- sort(drop(house.ld)))
#>  [1] 0.9357341 0.9854433 1.0573182 1.0680491 1.0772649 1.0803574 1.0824895
#>  [8] 1.0998759 1.1199975 1.1554228 1.1768138 1.1866427 1.2091541 1.2435026
#> [15] 1.2724096 1.2750171 1.2849903 1.3062598 1.3123988 1.3904715 1.4540087
#> [22] 1.4947753 1.4967585 1.6068789
mean(ratio)
#> [1] 1.223835

(house.plr <- polr(Sat ~ Infl + Type + Cont,
                   data = housing, weights = Freq))
#> Call:
#> polr(formula = Sat ~ Infl + Type + Cont, data = housing, weights = Freq)
#> 
#> Coefficients:
#>    InflMedium      InflHigh TypeApartment    TypeAtrium   TypeTerrace 
#>     0.5663937     1.2888191    -0.5723501    -0.3661866    -1.0910149 
#>      ContHigh 
#>     0.3602841 
#> 
#> Intercepts:
#>  Low|Medium Medium|High 
#>  -0.4961353   0.6907083 
#> 
#> Residual Deviance: 3479.149 
#> AIC: 3495.149 

house.pr1 <- predict(house.plr, expand.grid(hnames[-1]), type = "probs")
cbind(expand.grid(hnames[-1]), round(house.pr1, 2))
#>      Infl      Type Cont  Low Medium High
#> 1     Low     Tower  Low 0.38   0.29 0.33
#> 2  Medium     Tower  Low 0.26   0.27 0.47
#> 3    High     Tower  Low 0.14   0.21 0.65
#> 4     Low Apartment  Low 0.52   0.26 0.22
#> 5  Medium Apartment  Low 0.38   0.29 0.33
#> 6    High Apartment  Low 0.23   0.26 0.51
#> 7     Low    Atrium  Low 0.47   0.27 0.26
#> 8  Medium    Atrium  Low 0.33   0.29 0.38
#> 9    High    Atrium  Low 0.19   0.25 0.56
#> 10    Low   Terrace  Low 0.64   0.21 0.14
#> 11 Medium   Terrace  Low 0.51   0.26 0.23
#> 12   High   Terrace  Low 0.33   0.29 0.38
#> 13    Low     Tower High 0.30   0.28 0.42
#> 14 Medium     Tower High 0.19   0.25 0.56
#> 15   High     Tower High 0.10   0.17 0.72
#> 16    Low Apartment High 0.43   0.28 0.29
#> 17 Medium Apartment High 0.30   0.28 0.42
#> 18   High Apartment High 0.17   0.23 0.60
#> 19    Low    Atrium High 0.38   0.29 0.33
#> 20 Medium    Atrium High 0.26   0.27 0.47
#> 21   High    Atrium High 0.14   0.21 0.64
#> 22    Low   Terrace High 0.56   0.25 0.19
#> 23 Medium   Terrace High 0.42   0.28 0.30
#> 24   High   Terrace High 0.26   0.27 0.47

Fr <- matrix(housing$Freq, ncol  =  3, byrow = TRUE)
2*sum(Fr*log(house.pr/house.pr1))
#> [1] 9.065433

house.plr2 <- stepAIC(house.plr, ~.^2)
#> Start:  AIC=3495.15
#> Sat ~ Infl + Type + Cont
#> 
#>             Df    AIC
#> + Infl:Type  6 3484.6
#> + Type:Cont  3 3492.5
#> <none>         3495.1
#> + Infl:Cont  2 3498.9
#> - Cont       1 3507.5
#> - Type       3 3545.1
#> - Infl       2 3599.4
#> 
#> Step:  AIC=3484.64
#> Sat ~ Infl + Type + Cont + Infl:Type
#> 
#>             Df    AIC
#> + Type:Cont  3 3482.7
#> <none>         3484.6
#> + Infl:Cont  2 3488.5
#> - Infl:Type  6 3495.1
#> - Cont       1 3497.8
#> 
#> Step:  AIC=3482.69
#> Sat ~ Infl + Type + Cont + Infl:Type + Type:Cont
#> 
#>             Df    AIC
#> <none>         3482.7
#> - Type:Cont  3 3484.6
#> + Infl:Cont  2 3486.6
#> - Infl:Type  6 3492.5
house.plr2$anova
#> Stepwise Model Path 
#> Analysis of Deviance Table
#> 
#> Initial Model:
#> Sat ~ Infl + Type + Cont
#> 
#> Final Model:
#> Sat ~ Infl + Type + Cont + Infl:Type + Type:Cont
#> 
#> 
#>          Step Df  Deviance Resid. Df Resid. Dev      AIC
#> 1                               1673   3479.149 3495.149
#> 2 + Infl:Type  6 22.509347      1667   3456.640 3484.640
#> 3 + Type:Cont  3  7.945029      1664   3448.695 3482.695