1 + 2
## [1] 3
x = c(1, 2, 3)
1:3             # sequence of integers from 1 to 3
## [1] 1 2 3
x + c(4, 5, 6)  # vectorized
## [1] 5 7 9
x + 4           # recycling
## [1] 5 6 7
Vectors
numeric(), character(), logical(), integer(), complex(), …NA: ‘not available’factor(): values from restricted set of ‘levels’.Operations
==, <, <=, >, >=, …| (or), & (and), ! (not)[, e.g., x[c(2, 3)][<-, e.g., x[c(1, 3)] = x[c(1, 3)]is.na()Functions
x = rnorm(100)
y = x + rnorm(100)
plot(x, y)
data.frame
df <- data.frame(Independent = x, Dependent = y)
head(df)
##   Independent  Dependent
## 1   0.1709466 -1.9343537
## 2  -1.0432251 -1.4366633
## 3   0.1874114 -0.6666756
## 4  -1.0410644 -1.9896486
## 5   0.7765771  1.9752691
## 6  -1.4374832 -1.6424475
df[1:5, 1:2]
##   Independent  Dependent
## 1   0.1709466 -1.9343537
## 2  -1.0432251 -1.4366633
## 3   0.1874114 -0.6666756
## 4  -1.0410644 -1.9896486
## 5   0.7765771  1.9752691
df[1:5, ]
##   Independent  Dependent
## 1   0.1709466 -1.9343537
## 2  -1.0432251 -1.4366633
## 3   0.1874114 -0.6666756
## 4  -1.0410644 -1.9896486
## 5   0.7765771  1.9752691
plot(Dependent ~ Independent, df)  # 'formula' interface
df[, 1], df[, "Indep"], df[[1]], df[["Indep"]], df$IndepExercise: plot only values with Dependent > 0, Independent > 0
Select rows
ridx <- (df$Dependent > 0) & (df$Independent > 0)Plot subset
plot(Dependent ~ Independent, df[ridx, ])
Skin the cat another way
plot(
    Dependent ~ Independent, df,
    subset = (Dependent > 0) & (Independent > 0)
)
fit <- lm(Dependent ~ Independent, df)  # linear model -- regression
anova(fit)                              # summary table
## Analysis of Variance Table
## 
## Response: Dependent
##             Df  Sum Sq Mean Sq F value    Pr(>F)    
## Independent  1 118.609 118.609  118.03 < 2.2e-16 ***
## Residuals   98  98.483   1.005                      
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
plot(Dependent ~ Independent, df)
abline(fit)
lm(): plain-old functionfit: an object of class “lm”anova(): a generic with a specific method for class “lm”class(fit)
## [1] "lm"
methods(class="lm")
##  [1] add1           alias          anova          case.names    
##  [5] coerce         confint        cooks.distance deviance      
##  [9] dfbeta         dfbetas        drop1          dummy.coef    
## [13] effects        extractAIC     family         formula       
## [17] hatvalues      influence      initialize     kappa         
## [21] labels         logLik         model.frame    model.matrix  
## [25] nobs           plot           predict        print         
## [29] proj           qr             residuals      rstandard     
## [33] rstudent       show           simulate       slotsFromS3   
## [37] summary        variable.names vcov          
## see '?methods' for accessing help and source code
?"plot"          # plain-old-function or generic
?"plot.formula"  # method
library(ggplot2)
ggplot(df, aes(x = Independent, y = Dependent)) +
    geom_point() + geom_smooth(method = "lm")
library(ggplot2), once per session)