R Programming Language Basics

> setwd('~/Desktop/R_scripts')
 * Set working directory

> data=read.table('Statistic.csv') > data=read.table('Skeleton.csv', header=TRUE) > data=read.table('Skeleton.csv', header=T)
 * Read data

> data=read.table('C:\Documents\My R\Life Expentancy.txt') Error: '\D' used without hex digits in character string starting "'C:\U"
 * 1) Need two backslashes for Windows file system

> data=read.table('C:\\Documents\\My R\\Life Expectancy.txt')

> six_grades = c(68, 64, 90, 74, 78, 93) > sort(six_grades) [1] 64 68 74 78 90 93
 * Assign basic data to a variable

> data V1    V2 1                        Afghanistan 48.673 2                           Albania 76.918 3                           Algeria 73.131
 * Review data

> table(data) region Amer EAP EuCA MENA  SAs  SSA 39  30   50   21    8   49
 * 1) Show in a table format using the “table” syntax

> LifeExp=data[,2]
 * Assign a specific column to variable

> attach=(data)
 * Assign all columns to variables
 * 1) Now refer each column using the variable name which is the header column name

> plot(LifeExp, xlab='Name', ylab='Score') > plot(LifeExp, xlab='Name', ylab='Score', ylim=c(60,80)) > boxplot(LifeExp, xlab='Name', ylab='Score', ylim=c(60,80)) > boxplot(LifeExp, horizontal=TRUE, xlab='Name', ylab='Score', ylim=c(60,80))
 * Plotting

> plot(sort(size_grades), type='b', xlab='Student', ylab='Grade')

> summary(LifeExp) Min. 1st Qu. Median   Mean 3rd Qu. Max. 47.79  64.67   73.24   69.86   76.65   83.39
 * View basic arithmetic summary data

> summary(LifeExp, digits=6)   # Show more accurate in digits

> mean(DGDifference) [1] -14.15 > mean(DGDifference, trim=0.1)       # Trim 10% from the top and bottom [1] -13.82188 > median(DGDifference) [1] -13
 * Center of the data

> range(LifeExp) [1] 47.794 83.394 > max(LifeExp)-min(LifeExp) [1] 35.6 > IQR(LifeExp)   # Inter-Quartile Range (range between 1st and 3rd Quartile) [1] 11.986 > var(LifeExp)   # Variance [1] 93.48446 > sd(LifeExp)       # Standard deviation [1] 9.668736 > sqrt(var(LifeExp))  # Same Standard deviation [1] 9.668736
 * Spread of data

> round(sd(LifeExp)) [1] 10 > round(sd(LifeExp),1)   # Keep on decimal point [1] 9.7

> orig=c(23,4,66,107,12,45) > trim=sort(orig) > trim [1]  4  12  23  45  66 107 > trim=trim[2:5] > trim [1] 12 23 45 66
 * Variables

> median = median(trim) > mean = mean(trim) > range = max(trim) - min(trim) > iqr = IQR(trim) > st.dev = sd(trim) > all_values = c(median, mean, range, iqr, st.dev) > combined = cbind(all_values, all_values2)   # Combine two sets of data > rownames(all_values) = c('median','mean','range','IRQ','Std.Dev') # Rename row labels

> hist(lifeexp) > hist(lifeexp, breaks=5)    # 5 buckets > hist(lifeexp, breaks=5, xlab = 'Life Expentancy (years)', main='Histogram of Life Expectancies')
 * Shape of data
 * 1) Histogram


 * Categorical (qualitative) variables