{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "Working with Data\n", "====" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Scalars\n", "----" ] }, { "cell_type": "code", "execution_count": 1, "metadata": { "collapsed": true }, "outputs": [], "source": [ "n <- 3.14\n", "s <- 'c' \n", "b <- TRUE " ] }, { "cell_type": "code", "execution_count": 2, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/html": [ "'double'" ], "text/latex": [ "'double'" ], "text/markdown": [ "'double'" ], "text/plain": [ "[1] \"double\"" ] }, "execution_count": 2, "metadata": {}, "output_type": "execute_result" } ], "source": [ "typeof(n)" ] }, { "cell_type": "code", "execution_count": 3, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/html": [ "'character'" ], "text/latex": [ "'character'" ], "text/markdown": [ "'character'" ], "text/plain": [ "[1] \"character\"" ] }, "execution_count": 3, "metadata": {}, "output_type": "execute_result" } ], "source": [ "typeof(s)" ] }, { "cell_type": "code", "execution_count": 4, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/html": [ "'logical'" ], "text/latex": [ "'logical'" ], "text/markdown": [ "'logical'" ], "text/plain": [ "[1] \"logical\"" ] }, "execution_count": 4, "metadata": {}, "output_type": "execute_result" } ], "source": [ "typeof(b)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Vectors\n", "----\n", "\n", "Vectors are 1D collections of the same scalar type." ] }, { "cell_type": "code", "execution_count": 5, "metadata": { "collapsed": true }, "outputs": [], "source": [ "xs <- c(1, 0.5, 0.25)\n", "ss <- c('G', 'A', 'T', 'T', 'A', 'C', 'A')\n", "bs <- c(T, T, F, F, T, T, F, F)" ] }, { "cell_type": "code", "execution_count": 6, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/html": [ "
    \n", "\t
  1. 1
  2. \n", "\t
  3. 0.5
  4. \n", "\t
  5. 0.25
  6. \n", "
\n" ], "text/latex": [ "\\begin{enumerate*}\n", "\\item 1\n", "\\item 0.5\n", "\\item 0.25\n", "\\end{enumerate*}\n" ], "text/markdown": [ "1. 1\n", "2. 0.5\n", "3. 0.25\n", "\n", "\n" ], "text/plain": [ "[1] 1.00 0.50 0.25" ] }, "execution_count": 6, "metadata": {}, "output_type": "execute_result" } ], "source": [ "xs" ] }, { "cell_type": "code", "execution_count": 7, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/html": [ "
    \n", "\t
  1. 'G'
  2. \n", "\t
  3. 'A'
  4. \n", "\t
  5. 'T'
  6. \n", "\t
  7. 'T'
  8. \n", "\t
  9. 'A'
  10. \n", "\t
  11. 'C'
  12. \n", "\t
  13. 'A'
  14. \n", "
\n" ], "text/latex": [ "\\begin{enumerate*}\n", "\\item 'G'\n", "\\item 'A'\n", "\\item 'T'\n", "\\item 'T'\n", "\\item 'A'\n", "\\item 'C'\n", "\\item 'A'\n", "\\end{enumerate*}\n" ], "text/markdown": [ "1. 'G'\n", "2. 'A'\n", "3. 'T'\n", "4. 'T'\n", "5. 'A'\n", "6. 'C'\n", "7. 'A'\n", "\n", "\n" ], "text/plain": [ "[1] \"G\" \"A\" \"T\" \"T\" \"A\" \"C\" \"A\"" ] }, "execution_count": 7, "metadata": {}, "output_type": "execute_result" } ], "source": [ "ss" ] }, { "cell_type": "code", "execution_count": 8, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/html": [ "
    \n", "\t
  1. TRUE
  2. \n", "\t
  3. TRUE
  4. \n", "\t
  5. FALSE
  6. \n", "\t
  7. FALSE
  8. \n", "\t
  9. TRUE
  10. \n", "\t
  11. TRUE
  12. \n", "\t
  13. FALSE
  14. \n", "\t
  15. FALSE
  16. \n", "
\n" ], "text/latex": [ "\\begin{enumerate*}\n", "\\item TRUE\n", "\\item TRUE\n", "\\item FALSE\n", "\\item FALSE\n", "\\item TRUE\n", "\\item TRUE\n", "\\item FALSE\n", "\\item FALSE\n", "\\end{enumerate*}\n" ], "text/markdown": [ "1. TRUE\n", "2. TRUE\n", "3. FALSE\n", "4. FALSE\n", "5. TRUE\n", "6. TRUE\n", "7. FALSE\n", "8. FALSE\n", "\n", "\n" ], "text/plain": [ "[1] TRUE TRUE FALSE FALSE TRUE TRUE FALSE FALSE" ] }, "execution_count": 8, "metadata": {}, "output_type": "execute_result" } ], "source": [ "bs" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Extracting a single element" ] }, { "cell_type": "code", "execution_count": 9, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/html": [ "1" ], "text/latex": [ "1" ], "text/markdown": [ "1" ], "text/plain": [ "[1] 1" ] }, "execution_count": 9, "metadata": {}, "output_type": "execute_result" } ], "source": [ "xs[1]" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Extracting elments with a position vector" ] }, { "cell_type": "code", "execution_count": 10, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/html": [ "
    \n", "\t
  1. 'A'
  2. \n", "\t
  3. 'T'
  4. \n", "\t
  5. 'T'
  6. \n", "\t
  7. 'A'
  8. \n", "
\n" ], "text/latex": [ "\\begin{enumerate*}\n", "\\item 'A'\n", "\\item 'T'\n", "\\item 'T'\n", "\\item 'A'\n", "\\end{enumerate*}\n" ], "text/markdown": [ "1. 'A'\n", "2. 'T'\n", "3. 'T'\n", "4. 'A'\n", "\n", "\n" ], "text/plain": [ "[1] \"A\" \"T\" \"T\" \"A\"" ] }, "execution_count": 10, "metadata": {}, "output_type": "execute_result" } ], "source": [ "ss[2:5]" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Extracting elemnents wiht a logical vector" ] }, { "cell_type": "code", "execution_count": 11, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/html": [ "
    \n", "\t
  1. 'G'
  2. \n", "\t
  3. 'A'
  4. \n", "\t
  5. 'A'
  6. \n", "\t
  7. 'C'
  8. \n", "
\n" ], "text/latex": [ "\\begin{enumerate*}\n", "\\item 'G'\n", "\\item 'A'\n", "\\item 'A'\n", "\\item 'C'\n", "\\end{enumerate*}\n" ], "text/markdown": [ "1. 'G'\n", "2. 'A'\n", "3. 'A'\n", "4. 'C'\n", "\n", "\n" ], "text/plain": [ "[1] \"G\" \"A\" \"A\" \"C\"" ] }, "execution_count": 11, "metadata": {}, "output_type": "execute_result" } ], "source": [ "ss[bs]" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Extracting elements with a logical condition " ] }, { "cell_type": "code", "execution_count": 12, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/html": [ "
    \n", "\t
  1. 'A'
  2. \n", "\t
  3. 'T'
  4. \n", "\t
  5. 'T'
  6. \n", "\t
  7. 'A'
  8. \n", "\t
  9. 'A'
  10. \n", "
\n" ], "text/latex": [ "\\begin{enumerate*}\n", "\\item 'A'\n", "\\item 'T'\n", "\\item 'T'\n", "\\item 'A'\n", "\\item 'A'\n", "\\end{enumerate*}\n" ], "text/markdown": [ "1. 'A'\n", "2. 'T'\n", "3. 'T'\n", "4. 'A'\n", "5. 'A'\n", "\n", "\n" ], "text/plain": [ "[1] \"A\" \"T\" \"T\" \"A\" \"A\"" ] }, "execution_count": 12, "metadata": {}, "output_type": "execute_result" } ], "source": [ "ss[ss %in% c('A', 'T')]" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Matrices and Arrays\n", "----\n", "\n", "Like vecorrs, only in 2D (matrices) or more (arrays)." ] }, { "cell_type": "code", "execution_count": 13, "metadata": { "collapsed": true }, "outputs": [], "source": [ "m <- matrix(1:12, ncol=4)" ] }, { "cell_type": "code", "execution_count": 14, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/html": [ "\n", "\n", "\t\n", "\t\n", "\t\n", "\n", "
1 4 710
2 5 811
3 6 912
\n" ], "text/latex": [ "\\begin{tabular}{llll}\n", "\t 1 & 4 & 7 & 10\\\\\n", "\t 2 & 5 & 8 & 11\\\\\n", "\t 3 & 6 & 9 & 12\\\\\n", "\\end{tabular}\n" ], "text/markdown": [ "1. 1\n", "2. 2\n", "3. 3\n", "4. 4\n", "5. 5\n", "6. 6\n", "7. 7\n", "8. 8\n", "9. 9\n", "10. 10\n", "11. 11\n", "12. 12\n", "\n", "\n" ], "text/plain": [ " [,1] [,2] [,3] [,4]\n", "[1,] 1 4 7 10\n", "[2,] 2 5 8 11\n", "[3,] 3 6 9 12" ] }, "execution_count": 14, "metadata": {}, "output_type": "execute_result" } ], "source": [ "m" ] }, { "cell_type": "code", "execution_count": 15, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/html": [ "
    \n", "\t
  1. 6
  2. \n", "\t
  3. 7
  4. \n", "\t
  5. 8
  6. \n", "\t
  7. 9
  8. \n", "\t
  9. 10
  10. \n", "
\n" ], "text/latex": [ "\\begin{enumerate*}\n", "\\item 6\n", "\\item 7\n", "\\item 8\n", "\\item 9\n", "\\item 10\n", "\\end{enumerate*}\n" ], "text/markdown": [ "1. 6\n", "2. 7\n", "3. 8\n", "4. 9\n", "5. 10\n", "\n", "\n" ], "text/plain": [ "[1] 6 7 8 9 10" ] }, "execution_count": 15, "metadata": {}, "output_type": "execute_result" } ], "source": [ "m[6:10]" ] }, { "cell_type": "code", "execution_count": 16, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/html": [ "
    \n", "\t
  1. 1
  2. \n", "\t
  3. 2
  4. \n", "\t
  5. 3
  6. \n", "\t
  7. 4
  8. \n", "\t
  9. 5
  10. \n", "\t
  11. 6
  12. \n", "\t
  13. 7
  14. \n", "\t
  15. 8
  16. \n", "\t
  17. 9
  18. \n", "
\n" ], "text/latex": [ "\\begin{enumerate*}\n", "\\item 1\n", "\\item 2\n", "\\item 3\n", "\\item 4\n", "\\item 5\n", "\\item 6\n", "\\item 7\n", "\\item 8\n", "\\item 9\n", "\\end{enumerate*}\n" ], "text/markdown": [ "1. 1\n", "2. 2\n", "3. 3\n", "4. 4\n", "5. 5\n", "6. 6\n", "7. 7\n", "8. 8\n", "9. 9\n", "\n", "\n" ], "text/plain": [ "[1] 1 2 3 4 5 6 7 8 9" ] }, "execution_count": 16, "metadata": {}, "output_type": "execute_result" } ], "source": [ "m[m < 10]" ] }, { "cell_type": "code", "execution_count": 17, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/html": [ "
    \n", "\t
  1. 2
  2. \n", "\t
  3. 5
  4. \n", "\t
  5. 8
  6. \n", "\t
  7. 11
  8. \n", "
\n" ], "text/latex": [ "\\begin{enumerate*}\n", "\\item 2\n", "\\item 5\n", "\\item 8\n", "\\item 11\n", "\\end{enumerate*}\n" ], "text/markdown": [ "1. 2\n", "2. 5\n", "3. 8\n", "4. 11\n", "\n", "\n" ], "text/plain": [ "[1] 2 5 8 11" ] }, "execution_count": 17, "metadata": {}, "output_type": "execute_result" } ], "source": [ "m[2,]" ] }, { "cell_type": "code", "execution_count": 18, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/html": [ "
    \n", "\t
  1. 4
  2. \n", "\t
  3. 5
  4. \n", "\t
  5. 6
  6. \n", "
\n" ], "text/latex": [ "\\begin{enumerate*}\n", "\\item 4\n", "\\item 5\n", "\\item 6\n", "\\end{enumerate*}\n" ], "text/markdown": [ "1. 4\n", "2. 5\n", "3. 6\n", "\n", "\n" ], "text/plain": [ "[1] 4 5 6" ] }, "execution_count": 18, "metadata": {}, "output_type": "execute_result" } ], "source": [ "m[,2]" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Work!\n", "----" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Try to solve the following problems without searching the web. You can use the built-in `help()` function." ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Create the following $3 \\times 3$ matrix and save in a variable called `A`.\n", "\n", "- Row 1 = 4, 5, 6\n", "- Row 2 = 1, 2, 3\n", "- Row 3 = 7, 8, 9" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": true }, "outputs": [], "source": [ "\n", "\n", "\n", "\n" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "What is the sum of all the numbers in A?" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": true }, "outputs": [], "source": [ "\n", "\n", "\n" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Create a vector of the column sums in `A` using the `colSums` function." ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": true }, "outputs": [], "source": [ "\n", "\n", "\n" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Create a vector of the row sums in `A` using the `apply` function." ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false }, "outputs": [], "source": [ "\n", "\n", "\n" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "What is the sum of the numbers in bottom right $2 \\times2$ block (i.e the numbers 2, 3, 8, 9)" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": true }, "outputs": [], "source": [ "\n", "\n", "\n" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Lists\n", "----" ] }, { "cell_type": "code", "execution_count": 19, "metadata": { "collapsed": false }, "outputs": [], "source": [ "ls <- list(dna=ss, ispurine=ss %in% c('A', 'G'))" ] }, { "cell_type": "code", "execution_count": 20, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/html": [ "
\n", "\t
$dna
\n", "\t\t
    \n", "\t
  1. 'G'
  2. \n", "\t
  3. 'A'
  4. \n", "\t
  5. 'T'
  6. \n", "\t
  7. 'T'
  8. \n", "\t
  9. 'A'
  10. \n", "\t
  11. 'C'
  12. \n", "\t
  13. 'A'
  14. \n", "
\n", "
\n", "\t
$ispurine
\n", "\t\t
    \n", "\t
  1. TRUE
  2. \n", "\t
  3. TRUE
  4. \n", "\t
  5. FALSE
  6. \n", "\t
  7. FALSE
  8. \n", "\t
  9. TRUE
  10. \n", "\t
  11. FALSE
  12. \n", "\t
  13. TRUE
  14. \n", "
\n", "
\n", "
\n" ], "text/latex": [ "\\begin{description}\n", "\\item[\\$dna] \\begin{enumerate*}\n", "\\item 'G'\n", "\\item 'A'\n", "\\item 'T'\n", "\\item 'T'\n", "\\item 'A'\n", "\\item 'C'\n", "\\item 'A'\n", "\\end{enumerate*}\n", "\n", "\\item[\\$ispurine] \\begin{enumerate*}\n", "\\item TRUE\n", "\\item TRUE\n", "\\item FALSE\n", "\\item FALSE\n", "\\item TRUE\n", "\\item FALSE\n", "\\item TRUE\n", "\\end{enumerate*}\n", "\n", "\\end{description}\n" ], "text/markdown": [ "$dna\n", ": 1. 'G'\n", "2. 'A'\n", "3. 'T'\n", "4. 'T'\n", "5. 'A'\n", "6. 'C'\n", "7. 'A'\n", "\n", "\n", "\n", "$ispurine\n", ": 1. TRUE\n", "2. TRUE\n", "3. FALSE\n", "4. FALSE\n", "5. TRUE\n", "6. FALSE\n", "7. TRUE\n", "\n", "\n", "\n", "\n", "\n" ], "text/plain": [ "$dna\n", "[1] \"G\" \"A\" \"T\" \"T\" \"A\" \"C\" \"A\"\n", "\n", "$ispurine\n", "[1] TRUE TRUE FALSE FALSE TRUE FALSE TRUE\n" ] }, "execution_count": 20, "metadata": {}, "output_type": "execute_result" } ], "source": [ "ls" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Extracting a sublist from a list" ] }, { "cell_type": "code", "execution_count": 21, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/html": [ "$dna =
    \n", "\t
  1. 'G'
  2. \n", "\t
  3. 'A'
  4. \n", "\t
  5. 'T'
  6. \n", "\t
  7. 'T'
  8. \n", "\t
  9. 'A'
  10. \n", "\t
  11. 'C'
  12. \n", "\t
  13. 'A'
  14. \n", "
\n" ], "text/latex": [ "\\textbf{\\$dna} = \\begin{enumerate*}\n", "\\item 'G'\n", "\\item 'A'\n", "\\item 'T'\n", "\\item 'T'\n", "\\item 'A'\n", "\\item 'C'\n", "\\item 'A'\n", "\\end{enumerate*}\n" ], "text/markdown": [ "**$dna** = 1. 'G'\n", "2. 'A'\n", "3. 'T'\n", "4. 'T'\n", "5. 'A'\n", "6. 'C'\n", "7. 'A'\n", "\n", "\n" ], "text/plain": [ "$dna\n", "[1] \"G\" \"A\" \"T\" \"T\" \"A\" \"C\" \"A\"\n" ] }, "execution_count": 21, "metadata": {}, "output_type": "execute_result" } ], "source": [ "ls[1]" ] }, { "cell_type": "code", "execution_count": 22, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/html": [ "'list'" ], "text/latex": [ "'list'" ], "text/markdown": [ "'list'" ], "text/plain": [ "[1] \"list\"" ] }, "execution_count": 22, "metadata": {}, "output_type": "execute_result" } ], "source": [ "class(ls[1])" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Extracting an element from a list" ] }, { "cell_type": "code", "execution_count": 23, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/html": [ "
    \n", "\t
  1. 'G'
  2. \n", "\t
  3. 'A'
  4. \n", "\t
  5. 'T'
  6. \n", "\t
  7. 'T'
  8. \n", "\t
  9. 'A'
  10. \n", "\t
  11. 'C'
  12. \n", "\t
  13. 'A'
  14. \n", "
\n" ], "text/latex": [ "\\begin{enumerate*}\n", "\\item 'G'\n", "\\item 'A'\n", "\\item 'T'\n", "\\item 'T'\n", "\\item 'A'\n", "\\item 'C'\n", "\\item 'A'\n", "\\end{enumerate*}\n" ], "text/markdown": [ "1. 'G'\n", "2. 'A'\n", "3. 'T'\n", "4. 'T'\n", "5. 'A'\n", "6. 'C'\n", "7. 'A'\n", "\n", "\n" ], "text/plain": [ "[1] \"G\" \"A\" \"T\" \"T\" \"A\" \"C\" \"A\"" ] }, "execution_count": 23, "metadata": {}, "output_type": "execute_result" } ], "source": [ "ls$dna" ] }, { "cell_type": "code", "execution_count": 24, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/html": [ "'character'" ], "text/latex": [ "'character'" ], "text/markdown": [ "'character'" ], "text/plain": [ "[1] \"character\"" ] }, "execution_count": 24, "metadata": {}, "output_type": "execute_result" } ], "source": [ "class(ls$dna)" ] }, { "cell_type": "code", "execution_count": 25, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/html": [ "
    \n", "\t
  1. 'G'
  2. \n", "\t
  3. 'A'
  4. \n", "\t
  5. 'T'
  6. \n", "\t
  7. 'T'
  8. \n", "\t
  9. 'A'
  10. \n", "\t
  11. 'C'
  12. \n", "\t
  13. 'A'
  14. \n", "
\n" ], "text/latex": [ "\\begin{enumerate*}\n", "\\item 'G'\n", "\\item 'A'\n", "\\item 'T'\n", "\\item 'T'\n", "\\item 'A'\n", "\\item 'C'\n", "\\item 'A'\n", "\\end{enumerate*}\n" ], "text/markdown": [ "1. 'G'\n", "2. 'A'\n", "3. 'T'\n", "4. 'T'\n", "5. 'A'\n", "6. 'C'\n", "7. 'A'\n", "\n", "\n" ], "text/plain": [ "[1] \"G\" \"A\" \"T\" \"T\" \"A\" \"C\" \"A\"" ] }, "execution_count": 25, "metadata": {}, "output_type": "execute_result" } ], "source": [ "ls[[1]]" ] }, { "cell_type": "code", "execution_count": 26, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/html": [ "'character'" ], "text/latex": [ "'character'" ], "text/markdown": [ "'character'" ], "text/plain": [ "[1] \"character\"" ] }, "execution_count": 26, "metadata": {}, "output_type": "execute_result" } ], "source": [ "class(ls[[1]])" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Data frames\n", "----\n", "\n", "A data frame is a special list of vectors where all the vectors have the same length. Because all the vectors have the same length, it can also be thought of as a 2D table or matrix and manipulated in the same way." ] }, { "cell_type": "code", "execution_count": 27, "metadata": { "collapsed": false }, "outputs": [], "source": [ "df <- as.data.frame(ls)" ] }, { "cell_type": "code", "execution_count": 28, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/html": [ "'list'" ], "text/latex": [ "'list'" ], "text/markdown": [ "'list'" ], "text/plain": [ "[1] \"list\"" ] }, "execution_count": 28, "metadata": {}, "output_type": "execute_result" } ], "source": [ "class(ls)" ] }, { "cell_type": "code", "execution_count": 29, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/html": [ "'data.frame'" ], "text/latex": [ "'data.frame'" ], "text/markdown": [ "'data.frame'" ], "text/plain": [ "[1] \"data.frame\"" ] }, "execution_count": 29, "metadata": {}, "output_type": "execute_result" } ], "source": [ "class(df)" ] }, { "cell_type": "code", "execution_count": 30, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/html": [ "\n", "\n", "\n", "\t\n", "\t\n", "\t\n", "\t\n", "\t\n", "\t\n", "\t\n", "\n", "
dnaispurine
1GTRUE
2ATRUE
3TFALSE
4TFALSE
5ATRUE
6CFALSE
7ATRUE
\n" ], "text/latex": [ "\\begin{tabular}{r|ll}\n", " & dna & ispurine\\\\\n", "\\hline\n", "\t1 & G & TRUE\\\\\n", "\t2 & A & TRUE\\\\\n", "\t3 & T & FALSE\\\\\n", "\t4 & T & FALSE\\\\\n", "\t5 & A & TRUE\\\\\n", "\t6 & C & FALSE\\\\\n", "\t7 & A & TRUE\\\\\n", "\\end{tabular}\n" ], "text/plain": [ " dna ispurine\n", "1 G TRUE\n", "2 A TRUE\n", "3 T FALSE\n", "4 T FALSE\n", "5 A TRUE\n", "6 C FALSE\n", "7 A TRUE" ] }, "execution_count": 30, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df" ] }, { "cell_type": "code", "execution_count": 31, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/html": [ "\n", "\n", "\n", "\t\n", "\t\n", "\t\n", "\n", "
dnaispurine
4TFALSE
5ATRUE
6CFALSE
\n" ], "text/latex": [ "\\begin{tabular}{r|ll}\n", " & dna & ispurine\\\\\n", "\\hline\n", "\t4 & T & FALSE\\\\\n", "\t5 & A & TRUE\\\\\n", "\t6 & C & FALSE\\\\\n", "\\end{tabular}\n" ], "text/plain": [ " dna ispurine\n", "4 T FALSE\n", "5 A TRUE\n", "6 C FALSE" ] }, "execution_count": 31, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df[4:6, ]" ] }, { "cell_type": "code", "execution_count": 32, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/html": [ "
    \n", "\t
  1. TRUE
  2. \n", "\t
  3. TRUE
  4. \n", "\t
  5. FALSE
  6. \n", "\t
  7. FALSE
  8. \n", "\t
  9. TRUE
  10. \n", "\t
  11. FALSE
  12. \n", "\t
  13. TRUE
  14. \n", "
\n" ], "text/latex": [ "\\begin{enumerate*}\n", "\\item TRUE\n", "\\item TRUE\n", "\\item FALSE\n", "\\item FALSE\n", "\\item TRUE\n", "\\item FALSE\n", "\\item TRUE\n", "\\end{enumerate*}\n" ], "text/markdown": [ "1. TRUE\n", "2. TRUE\n", "3. FALSE\n", "4. FALSE\n", "5. TRUE\n", "6. FALSE\n", "7. TRUE\n", "\n", "\n" ], "text/plain": [ "[1] TRUE TRUE FALSE FALSE TRUE FALSE TRUE" ] }, "execution_count": 32, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df$ispurine" ] }, { "cell_type": "code", "execution_count": 33, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/html": [ "\n", "\n", "\n", "\t\n", "\t\n", "\t\n", "\t\n", "\n", "
dnaispurine
1GTRUE
2ATRUE
5ATRUE
7ATRUE
\n" ], "text/latex": [ "\\begin{tabular}{r|ll}\n", " & dna & ispurine\\\\\n", "\\hline\n", "\t1 & G & TRUE\\\\\n", "\t2 & A & TRUE\\\\\n", "\t5 & A & TRUE\\\\\n", "\t7 & A & TRUE\\\\\n", "\\end{tabular}\n" ], "text/plain": [ " dna ispurine\n", "1 G TRUE\n", "2 A TRUE\n", "5 A TRUE\n", "7 A TRUE" ] }, "execution_count": 33, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df[df$ispurine, ]" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Creating a data frame from scrach\n", "----" ] }, { "cell_type": "code", "execution_count": 34, "metadata": { "collapsed": true }, "outputs": [], "source": [ "gender <- c('M', 'M', 'F', 'F', 'M', 'F', 'M')\n", "height <- c(1.65, 1.82, 1.56, 1.66, 1.72, 1.6, 1.8)\n", "weight <- c(65, 102, 55, 46, 78, 60, 72)\n", "\n", "bods <- data.frame(gender, height, weight)" ] }, { "cell_type": "code", "execution_count": 35, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/html": [ "\n", "\n", "\n", "\t\n", "\t\n", "\t\n", "\t\n", "\t\n", "\t\n", "\t\n", "\n", "
genderheightweight
1M1.6565
2M1.82102
3F1.5655
4F1.6646
5M1.7278
6F1.660
7M1.872
\n" ], "text/latex": [ "\\begin{tabular}{r|lll}\n", " & gender & height & weight\\\\\n", "\\hline\n", "\t1 & M & 1.65 & 65\\\\\n", "\t2 & M & 1.82 & 102\\\\\n", "\t3 & F & 1.56 & 55\\\\\n", "\t4 & F & 1.66 & 46\\\\\n", "\t5 & M & 1.72 & 78\\\\\n", "\t6 & F & 1.6 & 60\\\\\n", "\t7 & M & 1.8 & 72\\\\\n", "\\end{tabular}\n" ], "text/plain": [ " gender height weight\n", "1 M 1.65 65\n", "2 M 1.82 102\n", "3 F 1.56 55\n", "4 F 1.66 46\n", "5 M 1.72 78\n", "6 F 1.60 60\n", "7 M 1.80 72" ] }, "execution_count": 35, "metadata": {}, "output_type": "execute_result" } ], "source": [ "bods" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "We can add a new calculated column easily. Let's include the body mass index (bmi)." ] }, { "cell_type": "code", "execution_count": 36, "metadata": { "collapsed": true }, "outputs": [], "source": [ "bods$bmi <- bods$weight/bods$height^2" ] }, { "cell_type": "code", "execution_count": 37, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/html": [ "\n", "\n", "\n", "\t\n", "\t\n", "\t\n", "\t\n", "\t\n", "\t\n", "\t\n", "\n", "
genderheightweightbmi
1M1.656523.87511
2M1.8210230.79338
3F1.565522.60026
4F1.664616.69328
5M1.727826.3656
6F1.66023.4375
7M1.87222.22222
\n" ], "text/latex": [ "\\begin{tabular}{r|llll}\n", " & gender & height & weight & bmi\\\\\n", "\\hline\n", "\t1 & M & 1.65 & 65 & 23.87511\\\\\n", "\t2 & M & 1.82 & 102 & 30.79338\\\\\n", "\t3 & F & 1.56 & 55 & 22.60026\\\\\n", "\t4 & F & 1.66 & 46 & 16.69328\\\\\n", "\t5 & M & 1.72 & 78 & 26.3656\\\\\n", "\t6 & F & 1.6 & 60 & 23.4375\\\\\n", "\t7 & M & 1.8 & 72 & 22.22222\\\\\n", "\\end{tabular}\n" ], "text/plain": [ " gender height weight bmi\n", "1 M 1.65 65 23.87511\n", "2 M 1.82 102 30.79338\n", "3 F 1.56 55 22.60026\n", "4 F 1.66 46 16.69328\n", "5 M 1.72 78 26.36560\n", "6 F 1.60 60 23.43750\n", "7 M 1.80 72 22.22222" ] }, "execution_count": 37, "metadata": {}, "output_type": "execute_result" } ], "source": [ "bods" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Let's get rid of the bmi column." ] }, { "cell_type": "code", "execution_count": 38, "metadata": { "collapsed": true }, "outputs": [], "source": [ "bods$bmi <- NULL" ] }, { "cell_type": "code", "execution_count": 39, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/html": [ "\n", "\n", "\n", "\t\n", "\t\n", "\t\n", "\t\n", "\t\n", "\t\n", "\t\n", "\n", "
genderheightweight
1M1.6565
2M1.82102
3F1.5655
4F1.6646
5M1.7278
6F1.660
7M1.872
\n" ], "text/latex": [ "\\begin{tabular}{r|lll}\n", " & gender & height & weight\\\\\n", "\\hline\n", "\t1 & M & 1.65 & 65\\\\\n", "\t2 & M & 1.82 & 102\\\\\n", "\t3 & F & 1.56 & 55\\\\\n", "\t4 & F & 1.66 & 46\\\\\n", "\t5 & M & 1.72 & 78\\\\\n", "\t6 & F & 1.6 & 60\\\\\n", "\t7 & M & 1.8 & 72\\\\\n", "\\end{tabular}\n" ], "text/plain": [ " gender height weight\n", "1 M 1.65 65\n", "2 M 1.82 102\n", "3 F 1.56 55\n", "4 F 1.66 46\n", "5 M 1.72 78\n", "6 F 1.60 60\n", "7 M 1.80 72" ] }, "execution_count": 39, "metadata": {}, "output_type": "execute_result" } ], "source": [ "bods" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Work!\n", "----" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "How many males are there?" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": true }, "outputs": [], "source": [ "\n", "\n", "\n" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "What is the mean height?" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "\n", "\n", "\n" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "What is the mean weight for femalse?" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": true }, "outputs": [], "source": [ "\n", "\n", "\n" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "A person is classified as obese if their BMI exceeds 30. Add the BMI column back into the data frame, as well as a new logical column `is.obese` indicating if a person is obese or not." ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": true }, "outputs": [], "source": [ "\n", "\n", "\n" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Reading data from files or URLs to dataframes\n", "----\n", "\n", "See [Examples from the Quick-R website](http://www.statmethods.net/input/importingdata.html)" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": true }, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "R", "language": "R", "name": "ir" }, "language_info": { "codemirror_mode": "r", "file_extension": ".r", "mimetype": "text/x-r-source", "name": "R", "pygments_lexer": "r", "version": "3.1.2" } }, "nbformat": 4, "nbformat_minor": 0 }