Difference between revisions of "R Programming Language Basics"

From Ittichai Chammavanijakul's Wiki
Jump to navigation Jump to search
(Created page with "* Set working directory <pre> > setwd('~/Desktop/R_scripts') </pre> * Read data <pre> > data=read.table('Statistic.csv') </pre> * Review data <pre> > data 1 A 2.0 2 B ...")
 
 
(One intermediate revision by the same user not shown)
Line 7: Line 7:
 
<pre>
 
<pre>
 
> data=read.table('Statistic.csv')
 
> data=read.table('Statistic.csv')
 +
> data=read.table('Skeleton.csv', header=TRUE)
 +
> data=read.table('Skeleton.csv', header=T)
 +
 +
# Need two backslashes for Windows file system
 +
> data=read.table('C:\Documents\My R\Life Expentancy.txt')
 +
Error: '\D' used without hex digits in character string starting "'C:\U"
 +
 +
> data=read.table('C:\\Documents\\My R\\Life Expectancy.txt')
 +
</pre>
 +
 +
* Assign basic data to a variable
 +
<pre>
 +
> six_grades = c(68, 64, 90, 74, 78, 93)
 +
> sort(six_grades)
 +
[1] 64 68 74 78 90 93
 
</pre>
 
</pre>
  
Line 12: Line 27:
 
<pre>
 
<pre>
 
> data
 
> data
1   A    2.0
+
                                  V1    V2
2    B    3.0
+
1                       Afghanistan 48.673
3   C    4.0
+
2                           Albania 76.918
 +
3                           Algeria 73.131
 +
 
 +
# Show in a table format using the “table” syntax
 +
> table(data)
 +
region
 +
Amer  EAP EuCA MENA  SAs  SSA
 +
  39  30  50  21   8  49
 +
 
 
</pre>
 
</pre>
  
 
* Assign a specific column to variable
 
* Assign a specific column to variable
 
<pre>
 
<pre>
> grade=data[,2]
+
> LifeExp=data[,2]
 +
</pre>
 +
 
 +
* Assign all columns to variables
 +
<pre>
 +
> attach=(data)
 +
# Now refer each column using the variable name which is the header column name
 
</pre>
 
</pre>
  
 
* Plotting
 
* Plotting
 
<pre>
 
<pre>
> plot(grade, xlab='Name', ylab='Score', ylim=c(1,3)
+
> plot(LifeExp, xlab='Name', ylab='Score')
> boxplot(grade, xlab='Name', ylab='Score', ylim=c(1,3)
+
> plot(LifeExp, xlab='Name', ylab='Score', ylim=c(60,80))
 +
> boxplot(LifeExp, xlab='Name', ylab='Score', ylim=c(60,80))
 +
> boxplot(LifeExp, horizontal=TRUE, xlab='Name', ylab='Score', ylim=c(60,80))
 +
 
 +
> plot(sort(size_grades), type='b', xlab='Student', ylab='Grade')
 +
 
 
</pre>
 
</pre>
  
 
* View basic arithmetic summary data
 
* View basic arithmetic summary data
 
<pre>
 
<pre>
> summary
+
> summary(LifeExp)
Min.     
+
  Min. 1st Qu.  Median    Mean 3rd Qu.    Max.
 +
  47.79  64.67  73.24  69.86  76.65  83.39 
 +
 
 +
> summary(LifeExp, digits=6)   # Show more accurate in digits 
 +
</pre>
 +
 
 +
* Center of the data
 +
<pre>
 +
> mean(DGDifference)
 +
[1] -14.15
 +
> mean(DGDifference, trim=0.1)        # Trim 10% from the top and bottom
 +
[1] -13.82188
 +
> median(DGDifference)
 +
[1] -13
 
</pre>
 
</pre>
  
[[Category: Languages]]
+
* Spread of data
 +
<pre>
 +
> range(LifeExp)
 +
[1] 47.794 83.394
 +
> max(LifeExp)-min(LifeExp)
 +
[1] 35.6
 +
> IQR(LifeExp)    # Inter-Quartile Range (range between 1st and 3rd Quartile)
 +
[1] 11.986
 +
> var(LifeExp)    # Variance
 +
[1] 93.48446
 +
> sd(LifeExp)        # Standard deviation
 +
[1] 9.668736
 +
> sqrt(var(LifeExp))  # Same Standard deviation
 +
[1] 9.668736
 +
 
 +
> round(sd(LifeExp))
 +
[1] 10
 +
> round(sd(LifeExp),1)    # Keep on decimal point
 +
[1] 9.7
 +
 
 +
</pre>
 +
 
 +
* Variables
 +
<pre>
 +
> orig=c(23,4,66,107,12,45)
 +
> trim=sort(orig)
 +
> trim
 +
[1]  4  12  23  45  66 107
 +
> trim=trim[2:5]
 +
> trim
 +
[1] 12 23 45 66
 +
 
 +
> median = median(trim)
 +
> mean = mean(trim)
 +
> range = max(trim) - min(trim)
 +
> iqr = IQR(trim)
 +
> st.dev = sd(trim)
 +
> all_values = c(median, mean, range, iqr, st.dev)
 +
> combined = cbind(all_values, all_values2)    # Combine two sets of data
 +
> rownames(all_values) = c('median','mean','range','IRQ','Std.Dev') # Rename row labels
 +
 
 +
</pre>
 +
 
 +
* Shape of data
 +
<pre>
 +
# Histogram
 +
> hist(lifeexp)
 +
> hist(lifeexp, breaks=5)    # 5 buckets
 +
> hist(lifeexp, breaks=5, xlab = 'Life Expentancy (years)', main='Histogram of Life Expectancies')
 +
 
 +
</pre>
 +
 
 +
* Categorical (qualitative) variables
 +
<pre>
 +
 
 +
</pre>
 +
[[Category: R]]

Latest revision as of 10:50, 9 March 2014

  • Set working directory
> setwd('~/Desktop/R_scripts')
  • Read data
> data=read.table('Statistic.csv')
> data=read.table('Skeleton.csv', header=TRUE)
> data=read.table('Skeleton.csv', header=T)

# Need two backslashes for Windows file system
> data=read.table('C:\Documents\My R\Life Expentancy.txt')
Error: '\D' used without hex digits in character string starting "'C:\U"

> data=read.table('C:\\Documents\\My R\\Life Expectancy.txt')
  • Assign basic data to a variable
> six_grades = c(68, 64, 90, 74, 78, 93)
> sort(six_grades)
[1] 64 68 74 78 90 93
  • Review data
> data
                                  V1     V2
1                        Afghanistan 48.673
2                            Albania 76.918
3                            Algeria 73.131

# Show in a table format using the “table” syntax
> table(data)
region
Amer  EAP EuCA MENA  SAs  SSA 
  39   30   50   21    8   49

  • Assign a specific column to variable
> LifeExp=data[,2]
  • Assign all columns to variables
> attach=(data)
# Now refer each column using the variable name which is the header column name
  • Plotting
> plot(LifeExp, xlab='Name', ylab='Score')
> plot(LifeExp, xlab='Name', ylab='Score', ylim=c(60,80))
> boxplot(LifeExp, xlab='Name', ylab='Score', ylim=c(60,80))
> boxplot(LifeExp, horizontal=TRUE, xlab='Name', ylab='Score', ylim=c(60,80))

> plot(sort(size_grades), type='b', xlab='Student', ylab='Grade')

  • View basic arithmetic summary data
> summary(LifeExp)
   Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
  47.79   64.67   73.24   69.86   76.65   83.39  

> summary(LifeExp, digits=6)    # Show more accurate in digits  
  • Center of the data
> mean(DGDifference)
[1] -14.15
> mean(DGDifference, trim=0.1)        # Trim 10% from the top and bottom
[1] -13.82188
> median(DGDifference)
[1] -13
  • Spread of data
> range(LifeExp)
[1] 47.794 83.394
> max(LifeExp)-min(LifeExp)
[1] 35.6
> IQR(LifeExp)    # Inter-Quartile Range (range between 1st and 3rd Quartile)
[1] 11.986
> var(LifeExp)    # Variance 
[1] 93.48446
> sd(LifeExp)        # Standard deviation
[1] 9.668736
> sqrt(var(LifeExp))   # Same Standard deviation
[1] 9.668736

> round(sd(LifeExp))
[1] 10
> round(sd(LifeExp),1)    # Keep on decimal point
[1] 9.7

  • Variables
> orig=c(23,4,66,107,12,45)
> trim=sort(orig)
> trim
[1]   4  12  23  45  66 107
> trim=trim[2:5]
> trim
[1] 12 23 45 66

> median = median(trim)
> mean = mean(trim)
> range = max(trim) - min(trim)
> iqr = IQR(trim)
> st.dev = sd(trim)
> all_values = c(median, mean, range, iqr, st.dev)
> combined = cbind(all_values, all_values2)    # Combine two sets of data
> rownames(all_values) = c('median','mean','range','IRQ','Std.Dev') # Rename row labels

  • Shape of data
# Histogram
> hist(lifeexp)
> hist(lifeexp, breaks=5)     # 5 buckets
> hist(lifeexp, breaks=5, xlab = 'Life Expentancy (years)', main='Histogram of Life Expectancies')

  • Categorical (qualitative) variables