R Programming Language Basics

From Ittichai Chammavanijakul's Wiki
Jump to navigation Jump to search
  • Set working directory
> setwd('~/Desktop/R_scripts')
  • Read data
> data=read.table('Statistic.csv')
> data=read.table('Skeleton.csv', header=TRUE)
> data=read.table('Skeleton.csv', header=T)

# Need two backslashes for Windows file system
> data=read.table('C:\Documents\My R\Life Expentancy.txt')
Error: '\D' used without hex digits in character string starting "'C:\U"

> data=read.table('C:\\Documents\\My R\\Life Expectancy.txt')
  • Assign basic data to a variable
> six_grades = c(68, 64, 90, 74, 78, 93)
> sort(six_grades)
[1] 64 68 74 78 90 93
  • Review data
> data
                                  V1     V2
1                        Afghanistan 48.673
2                            Albania 76.918
3                            Algeria 73.131

# Show in a table format using the “table” syntax
> table(data)
region
Amer  EAP EuCA MENA  SAs  SSA 
  39   30   50   21    8   49

  • Assign a specific column to variable
> LifeExp=data[,2]
  • Assign all columns to variables
> attach=(data)
# Now refer each column using the variable name which is the header column name
  • Plotting
> plot(LifeExp, xlab='Name', ylab='Score')
> plot(LifeExp, xlab='Name', ylab='Score', ylim=c(60,80))
> boxplot(LifeExp, xlab='Name', ylab='Score', ylim=c(60,80))
> boxplot(LifeExp, horizontal=TRUE, xlab='Name', ylab='Score', ylim=c(60,80))

> plot(sort(size_grades), type='b', xlab='Student', ylab='Grade')

  • View basic arithmetic summary data
> summary(LifeExp)
   Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
  47.79   64.67   73.24   69.86   76.65   83.39  

> summary(LifeExp, digits=6)    # Show more accurate in digits  
  • Center of the data
> mean(DGDifference)
[1] -14.15
> mean(DGDifference, trim=0.1)        # Trim 10% from the top and bottom
[1] -13.82188
> median(DGDifference)
[1] -13
  • Spread of data
> range(LifeExp)
[1] 47.794 83.394
> max(LifeExp)-min(LifeExp)
[1] 35.6
> IQR(LifeExp)    # Inter-Quartile Range (range between 1st and 3rd Quartile)
[1] 11.986
> var(LifeExp)    # Variance 
[1] 93.48446
> sd(LifeExp)        # Standard deviation
[1] 9.668736
> sqrt(var(LifeExp))   # Same Standard deviation
[1] 9.668736

> round(sd(LifeExp))
[1] 10
> round(sd(LifeExp),1)    # Keep on decimal point
[1] 9.7

  • Variables
> orig=c(23,4,66,107,12,45)
> trim=sort(orig)
> trim
[1]   4  12  23  45  66 107
> trim=trim[2:5]
> trim
[1] 12 23 45 66

> median = median(trim)
> mean = mean(trim)
> range = max(trim) - min(trim)
> iqr = IQR(trim)
> st.dev = sd(trim)
> all_values = c(median, mean, range, iqr, st.dev)
> combined = cbind(all_values, all_values2)    # Combine two sets of data
> rownames(all_values) = c('median','mean','range','IRQ','Std.Dev') # Rename row labels

  • Shape of data
# Histogram
> hist(lifeexp)
> hist(lifeexp, breaks=5)     # 5 buckets
> hist(lifeexp, breaks=5, xlab = 'Life Expentancy (years)', main='Histogram of Life Expectancies')

  • Categorical (qualitative) variables