Simple-CV

cv-website-header(5)
Slide 1
  • Science & Nonduality conference – California (2016)


  • Marie Curie Actions


Posts & Pages


R code

Keywords: Statistical computingOpen-source softwareBayesian analysisMarkov chain Monte Carlo methodsData visualisationLogical inferenceHypothesis testingDeductive reasoningAbductionCredibility intervalsProbability theoryDecision algorithmsNew statisticsReplication crisisCreative statistics

Search R documentation

Frequentist inference
Fixed-effects ANOVAGeneral linear models: mixing continuous and categorical covariatesOutput plot as PDFSimple linear regressionSkewness & Kurtosis
data(ToothGrowth)

## Example plot from ?ToothGrowth

coplot(len ~ dose | supp, data = ToothGrowth, panel = panel.smooth,
       xlab = "ToothGrowth data: length vs dose, given type of supplement")
## Treat dose as a factor
ToothGrowth$dose = factor(ToothGrowth$dose)
levels(ToothGrowth$dose) = c("Low", "Medium", "High")

summary(aov(len ~ supp*dose, data=ToothGrowth))


#install.packages("xtable")
library(xtable)
xtable(x, caption = NULL, label = NULL, align = NULL, digits = NULL,
       display = NULL, auto = FALSE, ...)

print(xtable(d), type="html")
print(xtable(d), type="latex") # anova table to latex
#https://cran.r-project.org/web/packages/xtable/index.html
#https://rmarkdown.rstudio.com/

data(ToothGrowth)

# model log2 of dose instead of dose directly
ToothGrowth$dose = log2(ToothGrowth$dose)

# Classical analysis for comparison
lmToothGrowth <- lm(len ~ supp + dose + supp:dose, data=ToothGrowth)
summary(lmToothGrowth)
x<- (1:5)
y<- (1:111)
pdf(file=file.choose())
hist(x)
plot(x, type='o')
dev.off()

Notes
file.choose() is a very handy command which saves the work associated with defining absolute and relative paths which can be quite cumbersome.
list.files for non-interactive selection.
choose.files for selecting multiple files interactively.
More
https://www.rdocumentation.org/packages/base/versions/3.5.2/topics/file.choose

# http://wiki.math.yorku.ca/index.php/VR4:_Chapter_1_summary
x <- seq( 1, 20, 0.5)   # sequence from 1 to 20 by 0.5
x                       # print it
w <- 1 + x/2            # a weight vector
y <- x + w*rnorm(x)     # generate y with standard deviations
                          #    equal to w
 
dum <- data.frame( x, y, w )     # make a data frame of 3 columns
dum                     # typing it's name shows the object
rm( x, y, w )           # remove the original variables


fm <- lm ( y ~ x, data = dum)   # fit a simple linear regression (between x and y)
summary( fm )                   # and look at the analysis



fm1 <- lm( y ~ x, data = dum, weight = 1/w^2 )  # a weighted regression  
summary(fm1)
 
lrf <- loess( y ~ x, dum)    # a smooth regression curve using a
                               #   modern local regression function
attach( dum )                # make the columns in the data frame visible
                               #   as variables (Note: before this command, typing x and
                               #   y would yield errors)
plot( x, y)                  # a standard scatterplot to which we will
                               #   fit regression curves 
lines( spline( x, fitted (lrf)), col = 2)  # add in the local regression using spline
                                             #  interpolation between calculated
                                             #  points
abline(0, 1, lty = 3, col = 3)    # add in the true regression line (lty=3: read line type=dotted;
                                    #   col=3: read colour=green; type "?par" for details)
abline(fm, col = 4)               # add in the unweighted regression line
abline(fm1, lty = 4, col = 5)     # add in the weighted regression line
plot( fitted(fm), resid(fm),
       xlab = "Fitted Values",
       ylab = "Residuals")          # a standard regression diagnostic plot
                                    # to check for heteroscedasticity.
                                    # The data were generated from a
                                    # heteroscedastic process. Can you see
                                    # it from this plot?
qqnorm( resid(fm) )               # Normal scores to check for skewness,
                                    #   kurtosis and outliers.  How would you
                                    #   expect heteroscedasticity to show up?
search()                          # Have a look at the search path
detach()                          # Remove the data frame from the search path
search()                          # Have another look. How did it change?
rm(fm, fm1, lrf, dum)             # Clean up (this is not that important in R 
                                    #    for reasons we will understand later)



library(rgl)
rgl.open()
rgl.bg(col='white')
x <- sort(rnorm(1000))
y <- rnorm(1000)
z1 <- atan2(x,y)
z <- rnorm(1000) + atan2(x, y)
plot3d(x, y, z1, col = rainbow(1000), size = 2)
bbox3d()
skew(x, na.rm = TRUE)
kurtosi(x, na.rm = TRUE)

#x A data.frame or matrix
#na.rm how to treat missing data

## The function is currently defined as
function (x, na.rm = TRUE) 
{
    if (length(dim(x)) == 0) {
        if (na.rm) {
            x <- x[!is.na(x)]
        			}
        
        mx <- mean(x)
         sdx <- sd(x,na.rm=na.rm)
        kurt <- sum((x - mx)^4)/(length(x) * sd(x)^4)  -3
        } else {
    
    kurt <- rep(NA,dim(x)[2])
    mx <- colMeans(x,na.rm=na.rm)
     sdx <- sd(x,na.rm=na.rm)
    for (i in 1:dim(x)[2]) {
    kurt[i] <- sum((x[,i] - mx[i])^4,  na.rm = na.rm)/((length(x[,i]) - sum(is.na(x[,i]))) *