{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Set up environment"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Loading tidyverse: ggplot2\n",
      "Loading tidyverse: tibble\n",
      "Loading tidyverse: tidyr\n",
      "Loading tidyverse: readr\n",
      "Loading tidyverse: purrr\n",
      "Loading tidyverse: dplyr\n",
      "Conflicts with tidy packages ---------------------------------------------------\n",
      "filter(): dplyr, stats\n",
      "lag():    dplyr, stats\n",
      "\n",
      "Attaching package: ‘foreach’\n",
      "\n",
      "The following objects are masked from ‘package:purrr’:\n",
      "\n",
      "    accumulate, when\n",
      "\n",
      "Loading required package: S4Vectors\n",
      "Loading required package: stats4\n",
      "Loading required package: BiocGenerics\n",
      "Loading required package: parallel\n",
      "\n",
      "Attaching package: ‘BiocGenerics’\n",
      "\n",
      "The following objects are masked from ‘package:parallel’:\n",
      "\n",
      "    clusterApply, clusterApplyLB, clusterCall, clusterEvalQ,\n",
      "    clusterExport, clusterMap, parApply, parCapply, parLapply,\n",
      "    parLapplyLB, parRapply, parSapply, parSapplyLB\n",
      "\n",
      "The following objects are masked from ‘package:dplyr’:\n",
      "\n",
      "    combine, intersect, setdiff, union\n",
      "\n",
      "The following objects are masked from ‘package:stats’:\n",
      "\n",
      "    IQR, mad, sd, var, xtabs\n",
      "\n",
      "The following objects are masked from ‘package:base’:\n",
      "\n",
      "    anyDuplicated, append, as.data.frame, cbind, colMeans, colnames,\n",
      "    colSums, do.call, duplicated, eval, evalq, Filter, Find, get, grep,\n",
      "    grepl, intersect, is.unsorted, lapply, lengths, Map, mapply, match,\n",
      "    mget, order, paste, pmax, pmax.int, pmin, pmin.int, Position, rank,\n",
      "    rbind, Reduce, rowMeans, rownames, rowSums, sapply, setdiff, sort,\n",
      "    table, tapply, union, unique, unsplit, which, which.max, which.min\n",
      "\n",
      "\n",
      "Attaching package: ‘S4Vectors’\n",
      "\n",
      "The following objects are masked from ‘package:dplyr’:\n",
      "\n",
      "    first, rename\n",
      "\n",
      "The following object is masked from ‘package:tidyr’:\n",
      "\n",
      "    expand\n",
      "\n",
      "The following object is masked from ‘package:base’:\n",
      "\n",
      "    expand.grid\n",
      "\n",
      "Loading required package: IRanges\n",
      "\n",
      "Attaching package: ‘IRanges’\n",
      "\n",
      "The following objects are masked from ‘package:dplyr’:\n",
      "\n",
      "    collapse, desc, slice\n",
      "\n",
      "The following object is masked from ‘package:purrr’:\n",
      "\n",
      "    reduce\n",
      "\n",
      "Loading required package: GenomicRanges\n",
      "Loading required package: GenomeInfoDb\n",
      "Loading required package: SummarizedExperiment\n",
      "Loading required package: Biobase\n",
      "Welcome to Bioconductor\n",
      "\n",
      "    Vignettes contain introductory material; view with\n",
      "    'browseVignettes()'. To cite Bioconductor, see\n",
      "    'citation(\"Biobase\")', and for packages 'citation(\"pkgname\")'.\n",
      "\n",
      "Loading required package: DelayedArray\n",
      "Loading required package: matrixStats\n",
      "\n",
      "Attaching package: ‘matrixStats’\n",
      "\n",
      "The following objects are masked from ‘package:Biobase’:\n",
      "\n",
      "    anyMissing, rowMedians\n",
      "\n",
      "The following object is masked from ‘package:dplyr’:\n",
      "\n",
      "    count\n",
      "\n",
      "\n",
      "Attaching package: ‘DelayedArray’\n",
      "\n",
      "The following objects are masked from ‘package:matrixStats’:\n",
      "\n",
      "    colMaxs, colMins, colRanges, rowMaxs, rowMins, rowRanges\n",
      "\n",
      "The following object is masked from ‘package:base’:\n",
      "\n",
      "    apply\n",
      "\n",
      "\n",
      "Attaching package: ‘limma’\n",
      "\n",
      "The following object is masked from ‘package:DESeq2’:\n",
      "\n",
      "    plotMA\n",
      "\n",
      "The following object is masked from ‘package:BiocGenerics’:\n",
      "\n",
      "    plotMA\n",
      "\n",
      "\n",
      "Attaching package: ‘gridExtra’\n",
      "\n",
      "The following object is masked from ‘package:Biobase’:\n",
      "\n",
      "    combine\n",
      "\n",
      "The following object is masked from ‘package:BiocGenerics’:\n",
      "\n",
      "    combine\n",
      "\n",
      "The following object is masked from ‘package:dplyr’:\n",
      "\n",
      "    combine\n",
      "\n",
      "\n",
      "---------------------\n",
      "Welcome to dendextend version 1.8.0\n",
      "Type citation('dendextend') for how to cite the package.\n",
      "\n",
      "Type browseVignettes(package = 'dendextend') for the package vignette.\n",
      "The github page is: https://github.com/talgalili/dendextend/\n",
      "\n",
      "Suggestions and bug-reports can be submitted at: https://github.com/talgalili/dendextend/issues\n",
      "Or contact: <tal.galili@gmail.com>\n",
      "\n",
      "\tTo suppress this message use:  suppressPackageStartupMessages(library(dendextend))\n",
      "---------------------\n",
      "\n",
      "\n",
      "Attaching package: ‘dendextend’\n",
      "\n",
      "The following object is masked from ‘package:stats’:\n",
      "\n",
      "    cutree\n",
      "\n",
      "\n",
      "Attaching package: ‘plotly’\n",
      "\n",
      "The following object is masked from ‘package:IRanges’:\n",
      "\n",
      "    slice\n",
      "\n",
      "The following object is masked from ‘package:S4Vectors’:\n",
      "\n",
      "    rename\n",
      "\n",
      "The following object is masked from ‘package:ggplot2’:\n",
      "\n",
      "    last_plot\n",
      "\n",
      "The following object is masked from ‘package:stats’:\n",
      "\n",
      "    filter\n",
      "\n",
      "The following object is masked from ‘package:graphics’:\n",
      "\n",
      "    layout\n",
      "\n"
     ]
    }
   ],
   "source": [
    "source(\"pilot_config.R\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [],
   "source": [
    "# get files of all star output files\n",
    "stardirs <- list.files(DATDIR)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Read in the count data output from STAR"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "There are 204 files under this folder. Each file is generated from a fastq file using STAR."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "204"
      ],
      "text/latex": [
       "204"
      ],
      "text/markdown": [
       "204"
      ],
      "text/plain": [
       "[1] 204"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "length(stardirs)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<ol class=list-inline>\n",
       "\t<li>'1_MA_J_S18_L001_ReadsPerGene.out.tab'</li>\n",
       "\t<li>'1_MA_J_S18_L002_ReadsPerGene.out.tab'</li>\n",
       "\t<li>'1_MA_J_S18_L003_ReadsPerGene.out.tab'</li>\n",
       "\t<li>'1_MA_J_S18_L004_ReadsPerGene.out.tab'</li>\n",
       "\t<li>'1_RZ_J_S26_L001_ReadsPerGene.out.tab'</li>\n",
       "\t<li>'1_RZ_J_S26_L002_ReadsPerGene.out.tab'</li>\n",
       "</ol>\n"
      ],
      "text/latex": [
       "\\begin{enumerate*}\n",
       "\\item '1\\_MA\\_J\\_S18\\_L001\\_ReadsPerGene.out.tab'\n",
       "\\item '1\\_MA\\_J\\_S18\\_L002\\_ReadsPerGene.out.tab'\n",
       "\\item '1\\_MA\\_J\\_S18\\_L003\\_ReadsPerGene.out.tab'\n",
       "\\item '1\\_MA\\_J\\_S18\\_L004\\_ReadsPerGene.out.tab'\n",
       "\\item '1\\_RZ\\_J\\_S26\\_L001\\_ReadsPerGene.out.tab'\n",
       "\\item '1\\_RZ\\_J\\_S26\\_L002\\_ReadsPerGene.out.tab'\n",
       "\\end{enumerate*}\n"
      ],
      "text/markdown": [
       "1. '1_MA_J_S18_L001_ReadsPerGene.out.tab'\n",
       "2. '1_MA_J_S18_L002_ReadsPerGene.out.tab'\n",
       "3. '1_MA_J_S18_L003_ReadsPerGene.out.tab'\n",
       "4. '1_MA_J_S18_L004_ReadsPerGene.out.tab'\n",
       "5. '1_RZ_J_S26_L001_ReadsPerGene.out.tab'\n",
       "6. '1_RZ_J_S26_L002_ReadsPerGene.out.tab'\n",
       "\n",
       "\n"
      ],
      "text/plain": [
       "[1] \"1_MA_J_S18_L001_ReadsPerGene.out.tab\"\n",
       "[2] \"1_MA_J_S18_L002_ReadsPerGene.out.tab\"\n",
       "[3] \"1_MA_J_S18_L003_ReadsPerGene.out.tab\"\n",
       "[4] \"1_MA_J_S18_L004_ReadsPerGene.out.tab\"\n",
       "[5] \"1_RZ_J_S26_L001_ReadsPerGene.out.tab\"\n",
       "[6] \"1_RZ_J_S26_L002_ReadsPerGene.out.tab\""
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "head(stardirs)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "View the head of one file (1_MA_J_S18_L001_ReadsPerGene.out.tab). Notice that there are four columns and the columns we want is the **first** and the **fourth** columns."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<ol>\n",
       "\t<li><ol class=list-inline>\n",
       "\t<li>'N_unmapped'</li>\n",
       "\t<li>'2690'</li>\n",
       "\t<li>'2690'</li>\n",
       "\t<li>'2690'</li>\n",
       "</ol>\n",
       "</li>\n",
       "\t<li><ol class=list-inline>\n",
       "\t<li>'N_multimapping'</li>\n",
       "\t<li>'66100'</li>\n",
       "\t<li>'66100'</li>\n",
       "\t<li>'66100'</li>\n",
       "</ol>\n",
       "</li>\n",
       "\t<li><ol class=list-inline>\n",
       "\t<li>'N_noFeature'</li>\n",
       "\t<li>'10626'</li>\n",
       "\t<li>'2238382'</li>\n",
       "\t<li>'20347'</li>\n",
       "</ol>\n",
       "</li>\n",
       "\t<li><ol class=list-inline>\n",
       "\t<li>'N_ambiguous'</li>\n",
       "\t<li>'173170'</li>\n",
       "\t<li>'1622'</li>\n",
       "\t<li>'647'</li>\n",
       "</ol>\n",
       "</li>\n",
       "\t<li><ol class=list-inline>\n",
       "\t<li>'CNAG_04548'</li>\n",
       "\t<li>'0'</li>\n",
       "\t<li>'0'</li>\n",
       "\t<li>'0'</li>\n",
       "</ol>\n",
       "</li>\n",
       "\t<li><ol class=list-inline>\n",
       "\t<li>'CNAG_07303'</li>\n",
       "\t<li>'0'</li>\n",
       "\t<li>'0'</li>\n",
       "\t<li>'0'</li>\n",
       "</ol>\n",
       "</li>\n",
       "\t<li><ol class=list-inline>\n",
       "\t<li>'CNAG_07304'</li>\n",
       "\t<li>'8'</li>\n",
       "\t<li>'0'</li>\n",
       "\t<li>'8'</li>\n",
       "</ol>\n",
       "</li>\n",
       "\t<li><ol class=list-inline>\n",
       "\t<li>'CNAG_00001'</li>\n",
       "\t<li>'0'</li>\n",
       "\t<li>'0'</li>\n",
       "\t<li>'0'</li>\n",
       "</ol>\n",
       "</li>\n",
       "\t<li><ol class=list-inline>\n",
       "\t<li>'CNAG_07305'</li>\n",
       "\t<li>'0'</li>\n",
       "\t<li>'0'</li>\n",
       "\t<li>'0'</li>\n",
       "</ol>\n",
       "</li>\n",
       "\t<li><ol class=list-inline>\n",
       "\t<li>'CNAG_00002'</li>\n",
       "\t<li>'66'</li>\n",
       "\t<li>'0'</li>\n",
       "\t<li>'66'</li>\n",
       "</ol>\n",
       "</li>\n",
       "</ol>\n"
      ],
      "text/latex": [
       "\\begin{enumerate}\n",
       "\\item \\begin{enumerate*}\n",
       "\\item 'N\\_unmapped'\n",
       "\\item '2690'\n",
       "\\item '2690'\n",
       "\\item '2690'\n",
       "\\end{enumerate*}\n",
       "\n",
       "\\item \\begin{enumerate*}\n",
       "\\item 'N\\_multimapping'\n",
       "\\item '66100'\n",
       "\\item '66100'\n",
       "\\item '66100'\n",
       "\\end{enumerate*}\n",
       "\n",
       "\\item \\begin{enumerate*}\n",
       "\\item 'N\\_noFeature'\n",
       "\\item '10626'\n",
       "\\item '2238382'\n",
       "\\item '20347'\n",
       "\\end{enumerate*}\n",
       "\n",
       "\\item \\begin{enumerate*}\n",
       "\\item 'N\\_ambiguous'\n",
       "\\item '173170'\n",
       "\\item '1622'\n",
       "\\item '647'\n",
       "\\end{enumerate*}\n",
       "\n",
       "\\item \\begin{enumerate*}\n",
       "\\item 'CNAG\\_04548'\n",
       "\\item '0'\n",
       "\\item '0'\n",
       "\\item '0'\n",
       "\\end{enumerate*}\n",
       "\n",
       "\\item \\begin{enumerate*}\n",
       "\\item 'CNAG\\_07303'\n",
       "\\item '0'\n",
       "\\item '0'\n",
       "\\item '0'\n",
       "\\end{enumerate*}\n",
       "\n",
       "\\item \\begin{enumerate*}\n",
       "\\item 'CNAG\\_07304'\n",
       "\\item '8'\n",
       "\\item '0'\n",
       "\\item '8'\n",
       "\\end{enumerate*}\n",
       "\n",
       "\\item \\begin{enumerate*}\n",
       "\\item 'CNAG\\_00001'\n",
       "\\item '0'\n",
       "\\item '0'\n",
       "\\item '0'\n",
       "\\end{enumerate*}\n",
       "\n",
       "\\item \\begin{enumerate*}\n",
       "\\item 'CNAG\\_07305'\n",
       "\\item '0'\n",
       "\\item '0'\n",
       "\\item '0'\n",
       "\\end{enumerate*}\n",
       "\n",
       "\\item \\begin{enumerate*}\n",
       "\\item 'CNAG\\_00002'\n",
       "\\item '66'\n",
       "\\item '0'\n",
       "\\item '66'\n",
       "\\end{enumerate*}\n",
       "\n",
       "\\end{enumerate}\n"
      ],
      "text/markdown": [
       "1. 1. 'N_unmapped'\n",
       "2. '2690'\n",
       "3. '2690'\n",
       "4. '2690'\n",
       "\n",
       "\n",
       "\n",
       "2. 1. 'N_multimapping'\n",
       "2. '66100'\n",
       "3. '66100'\n",
       "4. '66100'\n",
       "\n",
       "\n",
       "\n",
       "3. 1. 'N_noFeature'\n",
       "2. '10626'\n",
       "3. '2238382'\n",
       "4. '20347'\n",
       "\n",
       "\n",
       "\n",
       "4. 1. 'N_ambiguous'\n",
       "2. '173170'\n",
       "3. '1622'\n",
       "4. '647'\n",
       "\n",
       "\n",
       "\n",
       "5. 1. 'CNAG_04548'\n",
       "2. '0'\n",
       "3. '0'\n",
       "4. '0'\n",
       "\n",
       "\n",
       "\n",
       "6. 1. 'CNAG_07303'\n",
       "2. '0'\n",
       "3. '0'\n",
       "4. '0'\n",
       "\n",
       "\n",
       "\n",
       "7. 1. 'CNAG_07304'\n",
       "2. '8'\n",
       "3. '0'\n",
       "4. '8'\n",
       "\n",
       "\n",
       "\n",
       "8. 1. 'CNAG_00001'\n",
       "2. '0'\n",
       "3. '0'\n",
       "4. '0'\n",
       "\n",
       "\n",
       "\n",
       "9. 1. 'CNAG_07305'\n",
       "2. '0'\n",
       "3. '0'\n",
       "4. '0'\n",
       "\n",
       "\n",
       "\n",
       "10. 1. 'CNAG_00002'\n",
       "2. '66'\n",
       "3. '0'\n",
       "4. '66'\n",
       "\n",
       "\n",
       "\n",
       "\n",
       "\n"
      ],
      "text/plain": [
       "[[1]]\n",
       "[1] \"N_unmapped\" \"2690\"       \"2690\"       \"2690\"      \n",
       "\n",
       "[[2]]\n",
       "[1] \"N_multimapping\" \"66100\"          \"66100\"          \"66100\"         \n",
       "\n",
       "[[3]]\n",
       "[1] \"N_noFeature\" \"10626\"       \"2238382\"     \"20347\"      \n",
       "\n",
       "[[4]]\n",
       "[1] \"N_ambiguous\" \"173170\"      \"1622\"        \"647\"        \n",
       "\n",
       "[[5]]\n",
       "[1] \"CNAG_04548\" \"0\"          \"0\"          \"0\"         \n",
       "\n",
       "[[6]]\n",
       "[1] \"CNAG_07303\" \"0\"          \"0\"          \"0\"         \n",
       "\n",
       "[[7]]\n",
       "[1] \"CNAG_07304\" \"8\"          \"0\"          \"8\"         \n",
       "\n",
       "[[8]]\n",
       "[1] \"CNAG_00001\" \"0\"          \"0\"          \"0\"         \n",
       "\n",
       "[[9]]\n",
       "[1] \"CNAG_07305\" \"0\"          \"0\"          \"0\"         \n",
       "\n",
       "[[10]]\n",
       "[1] \"CNAG_00002\" \"66\"         \"0\"          \"66\"        \n"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "cmdstr <- paste(\"head\", file.path(DATDIR, stardirs[1]))\n",
    "cmdout <- system(cmdstr, intern = TRUE)\n",
    "str_split(cmdout, pattern = \"\\t\")"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Construct a matrix that gathers all the count files"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "helper functions to read the count files"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {},
   "outputs": [],
   "source": [
    "mycombine <- function(df1, df2) {\n",
    "    # Combine two data frames by gene names\n",
    "    #\n",
    "    # Args:\n",
    "    #   df1 (Dataframe): the first count data\n",
    "    #   df2 (Dataframe): the second count data\n",
    "    #\n",
    "    # Returns:\n",
    "    #   (Dataframe) The combined data frame of df1 and df2\n",
    "    full_join(df1, df2, by = \"gene\")\n",
    "}\n",
    "\n",
    "myfile <- function(filedir, filename) {\n",
    "    # Get the absolute paths of a file\n",
    "    #\n",
    "    # Args:\n",
    "    #   filedir  (Character): the directory of the folder\n",
    "    #   filename (Character): the filename\n",
    "    #\n",
    "    # Returns:\n",
    "    #   (Character) the directory of the input file\n",
    "    file.path(filedir, filename)\n",
    "}\n",
    "\n",
    "# Data type for each column\n",
    "coltypes <- list(col_character(), col_integer(), col_integer(), col_integer())"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "read the count files and combine them"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {},
   "outputs": [],
   "source": [
    "out <- foreach(stardir = stardirs, .combine = mycombine) %do% {\n",
    "    \n",
    "    # get a directory of each count file\n",
    "    cntfile <- myfile(DATDIR, stardir)\n",
    "    \n",
    "    # read in the count file\n",
    "    readr::read_tsv(cntfile, col_names = FALSE, col_types = coltypes) %>%\n",
    "        dplyr::select(X1, X4) %>% # get the 1st and 4th columns\n",
    "            dplyr::rename_(.dots=setNames(names(.), c(\"gene\",stardir)))\n",
    "}"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "There are 205 columns (204 count files + 1 rowname)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<ol class=list-inline>\n",
       "\t<li>8501</li>\n",
       "\t<li>205</li>\n",
       "</ol>\n"
      ],
      "text/latex": [
       "\\begin{enumerate*}\n",
       "\\item 8501\n",
       "\\item 205\n",
       "\\end{enumerate*}\n"
      ],
      "text/markdown": [
       "1. 8501\n",
       "2. 205\n",
       "\n",
       "\n"
      ],
      "text/plain": [
       "[1] 8501  205"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "dim(out)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<table>\n",
       "<thead><tr><th scope=col>gene</th><th scope=col>1_MA_J_S18_L001_ReadsPerGene.out.tab</th><th scope=col>1_MA_J_S18_L002_ReadsPerGene.out.tab</th><th scope=col>1_MA_J_S18_L003_ReadsPerGene.out.tab</th><th scope=col>1_MA_J_S18_L004_ReadsPerGene.out.tab</th><th scope=col>1_RZ_J_S26_L001_ReadsPerGene.out.tab</th></tr></thead>\n",
       "<tbody>\n",
       "\t<tr><td>N_unmapped    </td><td> 2690         </td><td> 2684         </td><td> 2672         </td><td> 2585         </td><td>  7218        </td></tr>\n",
       "\t<tr><td>N_multimapping</td><td>66100         </td><td>65234         </td><td>66538         </td><td>65066         </td><td>395848        </td></tr>\n",
       "\t<tr><td>N_noFeature   </td><td>20347         </td><td>20004         </td><td>20549         </td><td>20505         </td><td>768146        </td></tr>\n",
       "\t<tr><td>N_ambiguous   </td><td>  647         </td><td>  652         </td><td>  697         </td><td>  616         </td><td>  1431        </td></tr>\n",
       "\t<tr><td>CNAG_04548    </td><td>    0         </td><td>    0         </td><td>    0         </td><td>    1         </td><td>     0        </td></tr>\n",
       "\t<tr><td>CNAG_07303    </td><td>    0         </td><td>    0         </td><td>    0         </td><td>    0         </td><td>     0        </td></tr>\n",
       "</tbody>\n",
       "</table>\n"
      ],
      "text/latex": [
       "\\begin{tabular}{r|llllll}\n",
       " gene & 1\\_MA\\_J\\_S18\\_L001\\_ReadsPerGene.out.tab & 1\\_MA\\_J\\_S18\\_L002\\_ReadsPerGene.out.tab & 1\\_MA\\_J\\_S18\\_L003\\_ReadsPerGene.out.tab & 1\\_MA\\_J\\_S18\\_L004\\_ReadsPerGene.out.tab & 1\\_RZ\\_J\\_S26\\_L001\\_ReadsPerGene.out.tab\\\\\n",
       "\\hline\n",
       "\t N\\_unmapped     &  2690            &  2684            &  2672            &  2585            &   7218          \\\\\n",
       "\t N\\_multimapping & 66100            & 65234            & 66538            & 65066            & 395848          \\\\\n",
       "\t N\\_noFeature    & 20347            & 20004            & 20549            & 20505            & 768146          \\\\\n",
       "\t N\\_ambiguous    &   647            &   652            &   697            &   616            &   1431          \\\\\n",
       "\t CNAG\\_04548     &     0            &     0            &     0            &     1            &      0          \\\\\n",
       "\t CNAG\\_07303     &     0            &     0            &     0            &     0            &      0          \\\\\n",
       "\\end{tabular}\n"
      ],
      "text/markdown": [
       "\n",
       "gene | 1_MA_J_S18_L001_ReadsPerGene.out.tab | 1_MA_J_S18_L002_ReadsPerGene.out.tab | 1_MA_J_S18_L003_ReadsPerGene.out.tab | 1_MA_J_S18_L004_ReadsPerGene.out.tab | 1_RZ_J_S26_L001_ReadsPerGene.out.tab | \n",
       "|---|---|---|---|---|---|\n",
       "| N_unmapped     |  2690          |  2684          |  2672          |  2585          |   7218         | \n",
       "| N_multimapping | 66100          | 65234          | 66538          | 65066          | 395848         | \n",
       "| N_noFeature    | 20347          | 20004          | 20549          | 20505          | 768146         | \n",
       "| N_ambiguous    |   647          |   652          |   697          |   616          |   1431         | \n",
       "| CNAG_04548     |     0          |     0          |     0          |     1          |      0         | \n",
       "| CNAG_07303     |     0          |     0          |     0          |     0          |      0         | \n",
       "\n",
       "\n"
      ],
      "text/plain": [
       "  gene           1_MA_J_S18_L001_ReadsPerGene.out.tab\n",
       "1 N_unmapped      2690                               \n",
       "2 N_multimapping 66100                               \n",
       "3 N_noFeature    20347                               \n",
       "4 N_ambiguous      647                               \n",
       "5 CNAG_04548         0                               \n",
       "6 CNAG_07303         0                               \n",
       "  1_MA_J_S18_L002_ReadsPerGene.out.tab 1_MA_J_S18_L003_ReadsPerGene.out.tab\n",
       "1  2684                                 2672                               \n",
       "2 65234                                66538                               \n",
       "3 20004                                20549                               \n",
       "4   652                                  697                               \n",
       "5     0                                    0                               \n",
       "6     0                                    0                               \n",
       "  1_MA_J_S18_L004_ReadsPerGene.out.tab 1_RZ_J_S26_L001_ReadsPerGene.out.tab\n",
       "1  2585                                  7218                              \n",
       "2 65066                                395848                              \n",
       "3 20505                                768146                              \n",
       "4   616                                  1431                              \n",
       "5     1                                     0                              \n",
       "6     0                                     0                              "
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "out[1:6, 1:6]"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Arrange the results from the count files"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Separate the first four rows (-> nmisc) and others (-> genecounts)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "We see that our count matrix contains both summarizing counts and specific counts of each gene for each sample. Note that our 'out' matrix is in biological format (i.e. the samples are the columns and the variables are the rows). Let's split this matrix up into two matrices: `nmisc` and `genecounts`."
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "For `nmisc`, we will take the first 4 rows of `out` since those are the summarizing features. Next, we want to transform the data frame so that it is in statistical format (the samples are the rows and the feature types are the columns). Using a combination of gather and spread, we can transpose our matrix into the desired format."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "metadata": {},
   "outputs": [],
   "source": [
    "### Gather and spread the first four rows\n",
    "out %>%\n",
    "    dplyr::slice(1:4) %>%\n",
    "    gather(expid, value, -gene) %>% \n",
    "    spread(gene, value) %>%\n",
    "    rename_(.dots = setNames(names(.), c(\"expid\", \"namb\", \"nmulti\", \"nnofeat\",\"nunmap\"))) ->\n",
    "    nmisc"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<table>\n",
       "<thead><tr><th scope=col>expid</th><th scope=col>namb</th><th scope=col>nmulti</th><th scope=col>nnofeat</th><th scope=col>nunmap</th></tr></thead>\n",
       "<tbody>\n",
       "\t<tr><td>1_MA_J_S18_L001_ReadsPerGene.out.tab</td><td> 647                                </td><td> 66100                              </td><td> 20347                              </td><td>2690                                </td></tr>\n",
       "\t<tr><td>1_MA_J_S18_L002_ReadsPerGene.out.tab</td><td> 652                                </td><td> 65234                              </td><td> 20004                              </td><td>2684                                </td></tr>\n",
       "\t<tr><td>1_MA_J_S18_L003_ReadsPerGene.out.tab</td><td> 697                                </td><td> 66538                              </td><td> 20549                              </td><td>2672                                </td></tr>\n",
       "\t<tr><td>1_MA_J_S18_L004_ReadsPerGene.out.tab</td><td> 616                                </td><td> 65066                              </td><td> 20505                              </td><td>2585                                </td></tr>\n",
       "\t<tr><td>1_RZ_J_S26_L001_ReadsPerGene.out.tab</td><td>1431                                </td><td>395848                              </td><td>768146                              </td><td>7218                                </td></tr>\n",
       "\t<tr><td>1_RZ_J_S26_L002_ReadsPerGene.out.tab</td><td>1337                                </td><td>388079                              </td><td>755654                              </td><td>7022                                </td></tr>\n",
       "</tbody>\n",
       "</table>\n"
      ],
      "text/latex": [
       "\\begin{tabular}{r|lllll}\n",
       " expid & namb & nmulti & nnofeat & nunmap\\\\\n",
       "\\hline\n",
       "\t 1\\_MA\\_J\\_S18\\_L001\\_ReadsPerGene.out.tab &  647                                           &  66100                                         &  20347                                         & 2690                                          \\\\\n",
       "\t 1\\_MA\\_J\\_S18\\_L002\\_ReadsPerGene.out.tab &  652                                           &  65234                                         &  20004                                         & 2684                                          \\\\\n",
       "\t 1\\_MA\\_J\\_S18\\_L003\\_ReadsPerGene.out.tab &  697                                           &  66538                                         &  20549                                         & 2672                                          \\\\\n",
       "\t 1\\_MA\\_J\\_S18\\_L004\\_ReadsPerGene.out.tab &  616                                           &  65066                                         &  20505                                         & 2585                                          \\\\\n",
       "\t 1\\_RZ\\_J\\_S26\\_L001\\_ReadsPerGene.out.tab & 1431                                           & 395848                                         & 768146                                         & 7218                                          \\\\\n",
       "\t 1\\_RZ\\_J\\_S26\\_L002\\_ReadsPerGene.out.tab & 1337                                           & 388079                                         & 755654                                         & 7022                                          \\\\\n",
       "\\end{tabular}\n"
      ],
      "text/markdown": [
       "\n",
       "expid | namb | nmulti | nnofeat | nunmap | \n",
       "|---|---|---|---|---|---|\n",
       "| 1_MA_J_S18_L001_ReadsPerGene.out.tab |  647                                 |  66100                               |  20347                               | 2690                                 | \n",
       "| 1_MA_J_S18_L002_ReadsPerGene.out.tab |  652                                 |  65234                               |  20004                               | 2684                                 | \n",
       "| 1_MA_J_S18_L003_ReadsPerGene.out.tab |  697                                 |  66538                               |  20549                               | 2672                                 | \n",
       "| 1_MA_J_S18_L004_ReadsPerGene.out.tab |  616                                 |  65066                               |  20505                               | 2585                                 | \n",
       "| 1_RZ_J_S26_L001_ReadsPerGene.out.tab | 1431                                 | 395848                               | 768146                               | 7218                                 | \n",
       "| 1_RZ_J_S26_L002_ReadsPerGene.out.tab | 1337                                 | 388079                               | 755654                               | 7022                                 | \n",
       "\n",
       "\n"
      ],
      "text/plain": [
       "  expid                                namb nmulti nnofeat nunmap\n",
       "1 1_MA_J_S18_L001_ReadsPerGene.out.tab  647  66100  20347  2690  \n",
       "2 1_MA_J_S18_L002_ReadsPerGene.out.tab  652  65234  20004  2684  \n",
       "3 1_MA_J_S18_L003_ReadsPerGene.out.tab  697  66538  20549  2672  \n",
       "4 1_MA_J_S18_L004_ReadsPerGene.out.tab  616  65066  20505  2585  \n",
       "5 1_RZ_J_S26_L001_ReadsPerGene.out.tab 1431 395848 768146  7218  \n",
       "6 1_RZ_J_S26_L002_ReadsPerGene.out.tab 1337 388079 755654  7022  "
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "nmisc %>% head"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "To obtain the counts for specific genes, we will use the rest of our `out` matrix since it contains the gene counts. However, we still want to transform the data frame into statistical format, which we will accomplish using gather and spread."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 12,
   "metadata": {},
   "outputs": [],
   "source": [
    "### Gather and spread the genes to get a count matrix\n",
    "out %>%\n",
    "    dplyr::slice(-(1:4)) %>%\n",
    "    gather(expid, value, -gene) %>% \n",
    "    spread(gene, value) -> genecounts"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 13,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<table>\n",
       "<thead><tr><th scope=col>expid</th><th scope=col>CNAG_00001</th><th scope=col>CNAG_00002</th><th scope=col>CNAG_00003</th><th scope=col>CNAG_00004</th><th scope=col>CNAG_00005</th></tr></thead>\n",
       "<tbody>\n",
       "\t<tr><td>1_MA_J_S18_L001_ReadsPerGene.out.tab</td><td>0                                   </td><td>66                                  </td><td>38                                  </td><td>74                                  </td><td>33                                  </td></tr>\n",
       "\t<tr><td>1_MA_J_S18_L002_ReadsPerGene.out.tab</td><td>0                                   </td><td>59                                  </td><td>25                                  </td><td>79                                  </td><td>25                                  </td></tr>\n",
       "\t<tr><td>1_MA_J_S18_L003_ReadsPerGene.out.tab</td><td>0                                   </td><td>74                                  </td><td>27                                  </td><td>79                                  </td><td>32                                  </td></tr>\n",
       "\t<tr><td>1_MA_J_S18_L004_ReadsPerGene.out.tab</td><td>0                                   </td><td>66                                  </td><td>22                                  </td><td>69                                  </td><td>24                                  </td></tr>\n",
       "\t<tr><td>1_RZ_J_S26_L001_ReadsPerGene.out.tab</td><td>0                                   </td><td>50                                  </td><td>16                                  </td><td>51                                  </td><td>26                                  </td></tr>\n",
       "\t<tr><td>1_RZ_J_S26_L002_ReadsPerGene.out.tab</td><td>0                                   </td><td>45                                  </td><td> 7                                  </td><td>51                                  </td><td>31                                  </td></tr>\n",
       "</tbody>\n",
       "</table>\n"
      ],
      "text/latex": [
       "\\begin{tabular}{r|llllll}\n",
       " expid & CNAG\\_00001 & CNAG\\_00002 & CNAG\\_00003 & CNAG\\_00004 & CNAG\\_00005\\\\\n",
       "\\hline\n",
       "\t 1\\_MA\\_J\\_S18\\_L001\\_ReadsPerGene.out.tab & 0                                              & 66                                             & 38                                             & 74                                             & 33                                            \\\\\n",
       "\t 1\\_MA\\_J\\_S18\\_L002\\_ReadsPerGene.out.tab & 0                                              & 59                                             & 25                                             & 79                                             & 25                                            \\\\\n",
       "\t 1\\_MA\\_J\\_S18\\_L003\\_ReadsPerGene.out.tab & 0                                              & 74                                             & 27                                             & 79                                             & 32                                            \\\\\n",
       "\t 1\\_MA\\_J\\_S18\\_L004\\_ReadsPerGene.out.tab & 0                                              & 66                                             & 22                                             & 69                                             & 24                                            \\\\\n",
       "\t 1\\_RZ\\_J\\_S26\\_L001\\_ReadsPerGene.out.tab & 0                                              & 50                                             & 16                                             & 51                                             & 26                                            \\\\\n",
       "\t 1\\_RZ\\_J\\_S26\\_L002\\_ReadsPerGene.out.tab & 0                                              & 45                                             &  7                                             & 51                                             & 31                                            \\\\\n",
       "\\end{tabular}\n"
      ],
      "text/markdown": [
       "\n",
       "expid | CNAG_00001 | CNAG_00002 | CNAG_00003 | CNAG_00004 | CNAG_00005 | \n",
       "|---|---|---|---|---|---|\n",
       "| 1_MA_J_S18_L001_ReadsPerGene.out.tab | 0                                    | 66                                   | 38                                   | 74                                   | 33                                   | \n",
       "| 1_MA_J_S18_L002_ReadsPerGene.out.tab | 0                                    | 59                                   | 25                                   | 79                                   | 25                                   | \n",
       "| 1_MA_J_S18_L003_ReadsPerGene.out.tab | 0                                    | 74                                   | 27                                   | 79                                   | 32                                   | \n",
       "| 1_MA_J_S18_L004_ReadsPerGene.out.tab | 0                                    | 66                                   | 22                                   | 69                                   | 24                                   | \n",
       "| 1_RZ_J_S26_L001_ReadsPerGene.out.tab | 0                                    | 50                                   | 16                                   | 51                                   | 26                                   | \n",
       "| 1_RZ_J_S26_L002_ReadsPerGene.out.tab | 0                                    | 45                                   |  7                                   | 51                                   | 31                                   | \n",
       "\n",
       "\n"
      ],
      "text/plain": [
       "  expid                                CNAG_00001 CNAG_00002 CNAG_00003\n",
       "1 1_MA_J_S18_L001_ReadsPerGene.out.tab 0          66         38        \n",
       "2 1_MA_J_S18_L002_ReadsPerGene.out.tab 0          59         25        \n",
       "3 1_MA_J_S18_L003_ReadsPerGene.out.tab 0          74         27        \n",
       "4 1_MA_J_S18_L004_ReadsPerGene.out.tab 0          66         22        \n",
       "5 1_RZ_J_S26_L001_ReadsPerGene.out.tab 0          50         16        \n",
       "6 1_RZ_J_S26_L002_ReadsPerGene.out.tab 0          45          7        \n",
       "  CNAG_00004 CNAG_00005\n",
       "1 74         33        \n",
       "2 79         25        \n",
       "3 79         32        \n",
       "4 69         24        \n",
       "5 51         26        \n",
       "6 51         31        "
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "genecounts[1:6,1:6]"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### For each samples, sum up all the counts"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "We can create a variable denoting the number of total genes mapped for each sample by summing across the rows."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 14,
   "metadata": {},
   "outputs": [],
   "source": [
    "### Sum across the rows for a total gene count variable\n",
    "genecounts %>%    \n",
    "    mutate(ngenemap = rowSums(.[-1])) %>%\n",
    "    select(expid, ngenemap) -> ngene"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Summarize the results"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "We will create a comprehensive data frame `mapresults` which will combine `ngene` with `nmisc`. This data frame will have summarizing mapping features in addition to proportion features. "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 15,
   "metadata": {},
   "outputs": [],
   "source": [
    "### Merge in the 4 misc counts and add summaries\n",
    "ngene %>%\n",
    "    full_join(nmisc, by = \"expid\") %>%\n",
    "    mutate(depth = as.integer(ngenemap + namb + nmulti + nnofeat + nunmap)) %>%\n",
    "    mutate(prop.gene = ngenemap / depth) %>%\n",
    "    mutate(prop.nofeat = nnofeat / depth) %>%\n",
    "    mutate(prop.unique = (ngenemap + nnofeat) / depth) ->\n",
    "    mapresults"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Store the results"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 16,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<table>\n",
       "<thead><tr><th scope=col>expid</th><th scope=col>ngenemap</th><th scope=col>namb</th><th scope=col>nmulti</th><th scope=col>nnofeat</th><th scope=col>nunmap</th><th scope=col>depth</th><th scope=col>prop.gene</th><th scope=col>prop.nofeat</th><th scope=col>prop.unique</th></tr></thead>\n",
       "<tbody>\n",
       "\t<tr><td>1_MA_J_S18_L001_ReadsPerGene.out.tab</td><td>2399781                             </td><td> 647                                </td><td> 66100                              </td><td> 20347                              </td><td>2690                                </td><td>2489565                             </td><td>0.9639359                           </td><td>0.008172914                         </td><td>0.9721088                           </td></tr>\n",
       "\t<tr><td>1_MA_J_S18_L002_ReadsPerGene.out.tab</td><td>2362228                             </td><td> 652                                </td><td> 65234                              </td><td> 20004                              </td><td>2684                                </td><td>2450802                             </td><td>0.9638592                           </td><td>0.008162226                         </td><td>0.9720214                           </td></tr>\n",
       "\t<tr><td>1_MA_J_S18_L003_ReadsPerGene.out.tab</td><td>2436776                             </td><td> 697                                </td><td> 66538                              </td><td> 20549                              </td><td>2672                                </td><td>2527232                             </td><td>0.9642075                           </td><td>0.008131030                         </td><td>0.9723385                           </td></tr>\n",
       "\t<tr><td>1_MA_J_S18_L004_ReadsPerGene.out.tab</td><td>2417485                             </td><td> 616                                </td><td> 65066                              </td><td> 20505                              </td><td>2585                                </td><td>2506257                             </td><td>0.9645798                           </td><td>0.008181523                         </td><td>0.9727614                           </td></tr>\n",
       "\t<tr><td>1_RZ_J_S26_L001_ReadsPerGene.out.tab</td><td>2366742                             </td><td>1431                                </td><td>395848                              </td><td>768146                              </td><td>7218                                </td><td>3539385                             </td><td>0.6686874                           </td><td>0.217028100                         </td><td>0.8857155                           </td></tr>\n",
       "\t<tr><td>1_RZ_J_S26_L002_ReadsPerGene.out.tab</td><td>2331658                             </td><td>1337                                </td><td>388079                              </td><td>755654                              </td><td>7022                                </td><td>3483750                             </td><td>0.6692954                           </td><td>0.216908217                         </td><td>0.8862037                           </td></tr>\n",
       "</tbody>\n",
       "</table>\n"
      ],
      "text/latex": [
       "\\begin{tabular}{r|llllllllll}\n",
       " expid & ngenemap & namb & nmulti & nnofeat & nunmap & depth & prop.gene & prop.nofeat & prop.unique\\\\\n",
       "\\hline\n",
       "\t 1\\_MA\\_J\\_S18\\_L001\\_ReadsPerGene.out.tab & 2399781                                        &  647                                           &  66100                                         &  20347                                         & 2690                                           & 2489565                                        & 0.9639359                                      & 0.008172914                                    & 0.9721088                                     \\\\\n",
       "\t 1\\_MA\\_J\\_S18\\_L002\\_ReadsPerGene.out.tab & 2362228                                        &  652                                           &  65234                                         &  20004                                         & 2684                                           & 2450802                                        & 0.9638592                                      & 0.008162226                                    & 0.9720214                                     \\\\\n",
       "\t 1\\_MA\\_J\\_S18\\_L003\\_ReadsPerGene.out.tab & 2436776                                        &  697                                           &  66538                                         &  20549                                         & 2672                                           & 2527232                                        & 0.9642075                                      & 0.008131030                                    & 0.9723385                                     \\\\\n",
       "\t 1\\_MA\\_J\\_S18\\_L004\\_ReadsPerGene.out.tab & 2417485                                        &  616                                           &  65066                                         &  20505                                         & 2585                                           & 2506257                                        & 0.9645798                                      & 0.008181523                                    & 0.9727614                                     \\\\\n",
       "\t 1\\_RZ\\_J\\_S26\\_L001\\_ReadsPerGene.out.tab & 2366742                                        & 1431                                           & 395848                                         & 768146                                         & 7218                                           & 3539385                                        & 0.6686874                                      & 0.217028100                                    & 0.8857155                                     \\\\\n",
       "\t 1\\_RZ\\_J\\_S26\\_L002\\_ReadsPerGene.out.tab & 2331658                                        & 1337                                           & 388079                                         & 755654                                         & 7022                                           & 3483750                                        & 0.6692954                                      & 0.216908217                                    & 0.8862037                                     \\\\\n",
       "\\end{tabular}\n"
      ],
      "text/markdown": [
       "\n",
       "expid | ngenemap | namb | nmulti | nnofeat | nunmap | depth | prop.gene | prop.nofeat | prop.unique | \n",
       "|---|---|---|---|---|---|\n",
       "| 1_MA_J_S18_L001_ReadsPerGene.out.tab | 2399781                              |  647                                 |  66100                               |  20347                               | 2690                                 | 2489565                              | 0.9639359                            | 0.008172914                          | 0.9721088                            | \n",
       "| 1_MA_J_S18_L002_ReadsPerGene.out.tab | 2362228                              |  652                                 |  65234                               |  20004                               | 2684                                 | 2450802                              | 0.9638592                            | 0.008162226                          | 0.9720214                            | \n",
       "| 1_MA_J_S18_L003_ReadsPerGene.out.tab | 2436776                              |  697                                 |  66538                               |  20549                               | 2672                                 | 2527232                              | 0.9642075                            | 0.008131030                          | 0.9723385                            | \n",
       "| 1_MA_J_S18_L004_ReadsPerGene.out.tab | 2417485                              |  616                                 |  65066                               |  20505                               | 2585                                 | 2506257                              | 0.9645798                            | 0.008181523                          | 0.9727614                            | \n",
       "| 1_RZ_J_S26_L001_ReadsPerGene.out.tab | 2366742                              | 1431                                 | 395848                               | 768146                               | 7218                                 | 3539385                              | 0.6686874                            | 0.217028100                          | 0.8857155                            | \n",
       "| 1_RZ_J_S26_L002_ReadsPerGene.out.tab | 2331658                              | 1337                                 | 388079                               | 755654                               | 7022                                 | 3483750                              | 0.6692954                            | 0.216908217                          | 0.8862037                            | \n",
       "\n",
       "\n"
      ],
      "text/plain": [
       "  expid                                ngenemap namb nmulti nnofeat nunmap\n",
       "1 1_MA_J_S18_L001_ReadsPerGene.out.tab 2399781   647  66100  20347  2690  \n",
       "2 1_MA_J_S18_L002_ReadsPerGene.out.tab 2362228   652  65234  20004  2684  \n",
       "3 1_MA_J_S18_L003_ReadsPerGene.out.tab 2436776   697  66538  20549  2672  \n",
       "4 1_MA_J_S18_L004_ReadsPerGene.out.tab 2417485   616  65066  20505  2585  \n",
       "5 1_RZ_J_S26_L001_ReadsPerGene.out.tab 2366742  1431 395848 768146  7218  \n",
       "6 1_RZ_J_S26_L002_ReadsPerGene.out.tab 2331658  1337 388079 755654  7022  \n",
       "  depth   prop.gene prop.nofeat prop.unique\n",
       "1 2489565 0.9639359 0.008172914 0.9721088  \n",
       "2 2450802 0.9638592 0.008162226 0.9720214  \n",
       "3 2527232 0.9642075 0.008131030 0.9723385  \n",
       "4 2506257 0.9645798 0.008181523 0.9727614  \n",
       "5 3539385 0.6686874 0.217028100 0.8857155  \n",
       "6 3483750 0.6692954 0.216908217 0.8862037  "
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "head(mapresults)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 17,
   "metadata": {},
   "outputs": [],
   "source": [
    "outfile <- file.path(OUTDIR, \"hts-pilot-2018.RData\")\n",
    "save(mapresults, genecounts, file = outfile)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 18,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<strong>/home/jovyan/work/scratch/analysis_output/out/hts-pilot-2018.RData:</strong> 'e8d7991f674b7a9284abae8f38656c2e'"
      ],
      "text/latex": [
       "\\textbf{/home/jovyan/work/scratch/analysis\\textbackslash{}\\_output/out/hts-pilot-2018.RData:} 'e8d7991f674b7a9284abae8f38656c2e'"
      ],
      "text/markdown": [
       "**/home/jovyan/work/scratch/analysis_output/out/hts-pilot-2018.RData:** 'e8d7991f674b7a9284abae8f38656c2e'"
      ],
      "text/plain": [
       "/home/jovyan/work/scratch/analysis_output/out/hts-pilot-2018.RData \n",
       "                                \"e8d7991f674b7a9284abae8f38656c2e\" "
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "tools::md5sum(path.expand(outfile))"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# The End"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 19,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "R version 3.4.1 (2017-06-30)\n",
       "Platform: x86_64-pc-linux-gnu (64-bit)\n",
       "Running under: Debian GNU/Linux 8 (jessie)\n",
       "\n",
       "Matrix products: default\n",
       "BLAS: /opt/conda/lib/R/lib/libRblas.so\n",
       "LAPACK: /opt/conda/lib/R/lib/libRlapack.so\n",
       "\n",
       "locale:\n",
       " [1] LC_CTYPE=en_US.UTF-8       LC_NUMERIC=C              \n",
       " [3] LC_TIME=en_US.UTF-8        LC_COLLATE=en_US.UTF-8    \n",
       " [5] LC_MONETARY=en_US.UTF-8    LC_MESSAGES=en_US.UTF-8   \n",
       " [7] LC_PAPER=en_US.UTF-8       LC_NAME=C                 \n",
       " [9] LC_ADDRESS=C               LC_TELEPHONE=C            \n",
       "[11] LC_MEASUREMENT=en_US.UTF-8 LC_IDENTIFICATION=C       \n",
       "\n",
       "attached base packages:\n",
       " [1] tools     parallel  stats4    stats     graphics  grDevices utils    \n",
       " [8] datasets  methods   base     \n",
       "\n",
       "other attached packages:\n",
       " [1] bindrcpp_0.2               plotly_4.8.0              \n",
       " [3] dendextend_1.8.0           gridExtra_2.3             \n",
       " [5] RColorBrewer_1.1-2         qvalue_2.10.0             \n",
       " [7] limma_3.34.9               DESeq2_1.18.1             \n",
       " [9] SummarizedExperiment_1.8.0 DelayedArray_0.4.1        \n",
       "[11] matrixStats_0.53.1         Biobase_2.38.0            \n",
       "[13] GenomicRanges_1.30.3       GenomeInfoDb_1.14.0       \n",
       "[15] IRanges_2.12.0             S4Vectors_0.16.0          \n",
       "[17] BiocGenerics_0.24.0        haven_1.1.1               \n",
       "[19] stringr_1.3.0              foreach_1.4.4             \n",
       "[21] dplyr_0.7.4                purrr_0.2.4               \n",
       "[23] readr_1.1.1                tidyr_0.8.1               \n",
       "[25] tibble_1.4.2               ggplot2_3.0.0             \n",
       "[27] tidyverse_1.1.1           \n",
       "\n",
       "loaded via a namespace (and not attached):\n",
       "  [1] colorspace_1.3-2        class_7.3-14            modeltools_0.2-22      \n",
       "  [4] mclust_5.4.1            IRdisplay_0.4.4         htmlTable_1.9          \n",
       "  [7] XVector_0.18.0          base64enc_0.1-3         flexmix_2.3-14         \n",
       " [10] bit64_0.9-5             mvtnorm_1.0-8           AnnotationDbi_1.40.0   \n",
       " [13] lubridate_1.7.4         xml2_1.2.0              codetools_0.2-15       \n",
       " [16] splines_3.4.1           mnormt_1.5-5            robustbase_0.92-7      \n",
       " [19] geneplotter_1.56.0      knitr_1.20              IRkernel_0.8.11        \n",
       " [22] Formula_1.2-1           jsonlite_1.5            broom_0.4.4            \n",
       " [25] annotate_1.56.0         kernlab_0.9-25          cluster_2.0.6          \n",
       " [28] compiler_3.4.1          httr_1.3.1              backports_1.1.2        \n",
       " [31] assertthat_0.2.0        Matrix_1.2-14           lazyeval_0.2.1         \n",
       " [34] acepack_1.4.1           htmltools_0.3.6         gtable_0.2.0           \n",
       " [37] glue_1.2.0              GenomeInfoDbData_0.99.0 reshape2_1.4.3         \n",
       " [40] Rcpp_0.12.15            trimcluster_0.1-2.1     cellranger_1.1.0       \n",
       " [43] nlme_3.1-131            fpc_2.1-11.1            iterators_1.0.9        \n",
       " [46] psych_1.8.4             rvest_0.3.2             XML_3.98-1.11          \n",
       " [49] DEoptimR_1.0-8          MASS_7.3-48             zlibbioc_1.24.0        \n",
       " [52] scales_0.5.0            hms_0.3                 memoise_1.1.0          \n",
       " [55] rpart_4.1-13            latticeExtra_0.6-28     stringi_1.1.7          \n",
       " [58] RSQLite_2.0             genefilter_1.60.0       checkmate_1.8.5        \n",
       " [61] BiocParallel_1.12.0     repr_0.15.0             prabclus_2.2-6         \n",
       " [64] rlang_0.2.1             pkgconfig_2.0.1         bitops_1.0-6           \n",
       " [67] evaluate_0.10.1         lattice_0.20-34         bindr_0.1.1            \n",
       " [70] htmlwidgets_1.2         tidyselect_0.2.4        bit_1.1-12             \n",
       " [73] plyr_1.8.4              magrittr_1.5            R6_2.2.2               \n",
       " [76] Hmisc_4.0-3             pbdZMQ_0.3-2            DBI_1.0.0              \n",
       " [79] pillar_1.2.2            whisker_0.3-2           foreign_0.8-67         \n",
       " [82] withr_2.1.1             survival_2.40-1         RCurl_1.95-4.8         \n",
       " [85] nnet_7.3-12             modelr_0.1.2            crayon_1.3.4           \n",
       " [88] uuid_0.1-2              viridis_0.5.1           locfit_1.5-9.1         \n",
       " [91] grid_3.4.1              readxl_1.1.0            data.table_1.10.4      \n",
       " [94] blob_1.1.1              forcats_0.3.0           diptest_0.75-7         \n",
       " [97] digest_0.6.15           xtable_1.8-2            munsell_0.5.0          \n",
       "[100] viridisLite_0.3.0      "
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "sessionInfo()"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "R",
   "language": "R",
   "name": "ir"
  },
  "language_info": {
   "codemirror_mode": "r",
   "file_extension": ".r",
   "mimetype": "text/x-r-source",
   "name": "R",
   "pygments_lexer": "r",
   "version": "3.4.1"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}