R cheat-sheet

This page lists down common R commands.

Prerequisite


Install R

Commands


Get Current working directory

getwd()

setwd("c:\users\sunildalal")

Read, Write datasets, files


 -- To view built in datasets
data()	

-- To load the iris  data sets
data(iris)

-- View data type of iris dataset
class(iris)

-- Save object in a file
save(iris, file="data.RData")

-- Load data from RData file
load("data.RData")

-- Write data to a csv file
write.csv(test.data, file = "test.csv")

-- Read data from a csv file
csv.data = read.csv("test.csv", header = TRUE, row.names=1)

-- Read data from a online csv file
iris.data = read.csv(url("http://archive.ics.uci.edu/ml/machine-learning-databases/iris/iris.data"), header = FALSE,  col.names = c("Sepal.Length", "Sepal.Width", "Petal.Length", "Petal.Width", "Species"))


Read / Write to Excel

install.packages("WriteXLS")

library("WriteXLS")

WriteXLS("iris", ExcelFileName="iris.xls")

Data Manipulation

data(iris)

iris[1,]

iris[1:5,]

iris[,1]

iris[1:5, c("Sepal.Length", "Sepal.Width")]

-- Filter
iris[iris$Species=="setosa",1:5]


-- Print index of filtered data
which(iris$Species=="setosa")

iris[which(iris$Species=="setosa"),1:5]

Select Subset

R provides a subset function that enables users to subset the data frame by observations with a logical statement.

-- How to select subset of data

custom.data = subset(iris, select=c("Sepal.Length", "Sepal.Width"))

custom.setosa = subset(iris, Species =="setosa")

setosa.filter.data= subset(iris, Petal.Length <=1.4 & Petal.Width >= 0.2, select=Species )

-- Exclude a column
modifiedSalarySet <- subset(Salaries, select = -c(sex))

Merging example

Merging data involves joining two data frames into a merged data frame by a common column or row name


-- Merging example
flower.type = data.frame(Species = "setosa", Flower = "iris")

merge(flower.type, iris[1:3,], by ="Species")

How to order data

head(iris[order(iris$Sepal.Length, decreasing = TRUE),])

Substitute a string

-- Replace first
sub("s", "q", names(iris))

-- Replace All
gsub("s", "q", names(iris))

Univariate Descriptive Statistics


mean(iris$Sepal.Length)

median(iris$Sepal.Length)

sd(iris$Sepal.Length)

var(iris$Sepal.Length)

min(iris$Sepal.Length)

max(iris$Sepal.Length)



range(iris$Sepal.Length)

quantile(iris$Sepal.Length)

-- Use sapply to obtain summary statistics on every numeric attribute of the data frame

sapply(iris[1:4], mean, na.rm=TRUE)

-- Use summary function to display the mean, median, 25th and 75th quartiles, min, and max of every attribute

summary(iris)

-- Use aggregate to calculate the mean of each iris attribute group by the species

aggregate(x=iris[,1:4],by=list(iris$Species),FUN=mean)

-- To obtain a frequency count of numerical data
stem(mtcars$mpg)

-- histogram of ggplot to plot the stem-and-leaf figure
data(mtcars)
library(ggplot2)
qplot(mtcars$mpg, binwidth=2)

Correlations and multivariate analysis

data(mtcars)
data(iris)

-- correlation for users to investigate the relationship between variables
-- The numeric element of the matrix indicates the strength of the relationship between the two variables.
cor(iris[,1:4])

-- compute the covariance of each attribute pair within the iris
-- Covariance can measure how variables are linearly related.
cov(iris[,1:4])


-- Plot the heatmap of the correlation coefficient matrix
library(reshape2)
qplot(x=Var1, y=Var2, data=melt(cor(mtcars[1:3])), fill=value, geom="tile")

Linear Regression and multivariate analysis

data(mtcars)
data(iris)

lmfit = lm(mtcars$mpg ~ mtcars$cyl)

summary(lmfit)

-- To create an analysis of a variance table, use anova function
anova(lmfit)

lmfit = lm(mtcars$mpg ~ mtcars$cyl)
plot(mtcars$cyl, mtcars$mpg)
abline(lmfit)


Statistical and correlation tests

-- Statistical test
t.test(iris$Petal.Width[iris$Species=="setosa"], 
+        iris$Petal.Width[iris$Species=="versicolor"])

-- Correlation test
cor.test(iris$Sepal.Length, iris$Sepal.Width)


-- Bionomial test
binom.test(x=92, n=315, p=1/6)

Data Visualization

-- Calculate the frequency of species within the iris using the table command
table.iris = table(iris$Species)

pie(table.iris)

hist(iris$Sepal.Length)

boxplot(Petal.Width ~ Species, data = iris)

plot(x=iris$Petal.Length, y=iris$Petal.Width, col=iris$Species)



Version History


Date Description
2015-08-15    Initial Version