R Programming Language Basics
Jump to navigation
Jump to search
- Set working directory
> setwd('~/Desktop/R_scripts')
- Read data
> data=read.table('Statistic.csv') > data=read.table('Skeleton.csv', header=TRUE) > data=read.table('Skeleton.csv', header=T) # Need two backslashes for Windows file system > data=read.table('C:\Documents\My R\Life Expentancy.txt') Error: '\D' used without hex digits in character string starting "'C:\U" > data=read.table('C:\\Documents\\My R\\Life Expectancy.txt')
- Assign basic data to a variable
> six_grades = c(68, 64, 90, 74, 78, 93) > sort(six_grades) [1] 64 68 74 78 90 93
- Review data
> data V1 V2 1 Afghanistan 48.673 2 Albania 76.918 3 Algeria 73.131 # Show in a table format using the “table” syntax > table(data) region Amer EAP EuCA MENA SAs SSA 39 30 50 21 8 49
- Assign a specific column to variable
> LifeExp=data[,2]
- Assign all columns to variables
> attach=(data) # Now refer each column using the variable name which is the header column name
- Plotting
> plot(LifeExp, xlab='Name', ylab='Score') > plot(LifeExp, xlab='Name', ylab='Score', ylim=c(60,80)) > boxplot(LifeExp, xlab='Name', ylab='Score', ylim=c(60,80)) > boxplot(LifeExp, horizontal=TRUE, xlab='Name', ylab='Score', ylim=c(60,80)) > plot(sort(size_grades), type='b', xlab='Student', ylab='Grade')
- View basic arithmetic summary data
> summary(LifeExp) Min. 1st Qu. Median Mean 3rd Qu. Max. 47.79 64.67 73.24 69.86 76.65 83.39 > summary(LifeExp, digits=6) # Show more accurate in digits
- Center of the data
> mean(DGDifference) [1] -14.15 > mean(DGDifference, trim=0.1) # Trim 10% from the top and bottom [1] -13.82188 > median(DGDifference) [1] -13
- Spread of data
> range(LifeExp) [1] 47.794 83.394 > max(LifeExp)-min(LifeExp) [1] 35.6 > IQR(LifeExp) # Inter-Quartile Range (range between 1st and 3rd Quartile) [1] 11.986 > var(LifeExp) # Variance [1] 93.48446 > sd(LifeExp) # Standard deviation [1] 9.668736 > sqrt(var(LifeExp)) # Same Standard deviation [1] 9.668736 > round(sd(LifeExp)) [1] 10 > round(sd(LifeExp),1) # Keep on decimal point [1] 9.7
- Variables
> orig=c(23,4,66,107,12,45) > trim=sort(orig) > trim [1] 4 12 23 45 66 107 > trim=trim[2:5] > trim [1] 12 23 45 66 > median = median(trim) > mean = mean(trim) > range = max(trim) - min(trim) > iqr = IQR(trim) > st.dev = sd(trim) > all_values = c(median, mean, range, iqr, st.dev) > combined = cbind(all_values, all_values2) # Combine two sets of data > rownames(all_values) = c('median','mean','range','IRQ','Std.Dev') # Rename row labels
- Shape of data
# Histogram > hist(lifeexp) > hist(lifeexp, breaks=5) # 5 buckets > hist(lifeexp, breaks=5, xlab = 'Life Expentancy (years)', main='Histogram of Life Expectancies')
- Categorical (qualitative) variables