{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "# Initialize Variables and Make Directories" ] }, { "cell_type": "code", "execution_count": 1, "metadata": {}, "outputs": [], "source": [ "source bioinf_intro_config.sh\n", "mkdir -p $TRIMMED $MYINFO $GENOME_DIR $STAR_OUT" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "# Make adapter file" ] }, { "cell_type": "code", "execution_count": 2, "metadata": {}, "outputs": [], "source": [ "echo \">Adapter\n", "AGATCGGAAGAGCACACGTCTGAACTCCAGTCA\n", ">AdapterRead2\n", "AGATCGGAAGAGCGTCGTGTAGGGAAAGAGTGT\n", ">Adapter_rc\n", "TGACTGGAGTTCAGACGTGTGCTCTTCCGATCT\n", ">AdapterRead2_rc\n", "ACACTCTTTCCCTACACGACGCTCTTCCGATCT\" > $MYINFO/neb_e7600_adapters.fasta" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "# Download and Index Genome" ] }, { "cell_type": "code", "execution_count": 3, "metadata": {}, "outputs": [], "source": [ "SHARED_URL=\"ftp://ftp.ensemblgenomes.org/pub/release-39/fungi\"\n", "FASTA=${GENOME_DIR}/Cryptococcus_neoformans_var_grubii_h99.CNA3.dna.toplevel.fa\n", "GTF=${GENOME_DIR}/Cryptococcus_neoformans_var_grubii_h99.CNA3.39.gtf" ] }, { "cell_type": "code", "execution_count": 4, "metadata": { "scrolled": true }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "2018-07-24 13:27:47 URL: ftp://ftp.ensemblgenomes.org/pub/release-39/fungi/gtf/fungi_basidiomycota1_collection/cryptococcus_neoformans_var_grubii_h99/Cryptococcus_neoformans_var_grubii_h99.CNA3.39.gtf.gz [1796344] -> \"/home/jovyan/work/scratch/bioinf_intro/genome/Cryptococcus_neoformans_var_grubii_h99.CNA3.39.gtf.gz.1\" [1]\n", "2018-07-24 13:27:49 URL: ftp://ftp.ensemblgenomes.org/pub/release-39/fungi/fasta/fungi_basidiomycota1_collection/cryptococcus_neoformans_var_grubii_h99/dna/Cryptococcus_neoformans_var_grubii_h99.CNA3.dna.toplevel.fa.gz [5922212] -> \"/home/jovyan/work/scratch/bioinf_intro/genome/Cryptococcus_neoformans_var_grubii_h99.CNA3.dna.toplevel.fa.gz.1\" [1]\n", "gzip: /home/jovyan/work/scratch/bioinf_intro/genome/Cryptococcus_neoformans_var_grubii_h99.CNA3.dna.toplevel.fa already exists; do you wish to overwrite (y or n)? \n" ] } ], "source": [ "wget --no-verbose --directory-prefix ${GENOME_DIR} \"${SHARED_URL}/gtf/fungi_basidiomycota1_collection/cryptococcus_neoformans_var_grubii_h99/$(basename $GTF).gz\"\n", "wget --no-verbose --directory-prefix ${GENOME_DIR} \"${SHARED_URL}/fasta/fungi_basidiomycota1_collection/cryptococcus_neoformans_var_grubii_h99/dna/$(basename $FASTA).gz\"\n", "\n", "gunzip -f ${FASTA}.gz\n", "gunzip -f ${GTF}.gz" ] }, { "cell_type": "code", "execution_count": 5, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Jul 24 13:32:28 ..... started STAR run\n", "Jul 24 13:32:28 ... starting to generate Genome files\n", "Jul 24 13:32:29 ... starting to sort Suffix Array. This may take a long time...\n", "Jul 24 13:32:29 ... sorting Suffix Array chunks and saving them to disk...\n", "Jul 24 13:32:52 ... loading chunks from disk, packing SA...\n", "Jul 24 13:32:52 ... finished generating suffix array\n", "Jul 24 13:32:52 ... generating Suffix Array index\n", "Jul 24 13:32:54 ... completed Suffix Array index\n", "Jul 24 13:32:54 ..... processing annotations GTF\n", "Jul 24 13:32:55 ..... inserting junctions into the genome indices\n", "Jul 24 13:33:38 ... writing Genome to disk ...\n", "Jul 24 13:33:38 ... writing Suffix Array to disk ...\n", "Jul 24 13:33:39 ... writing SAindex to disk\n", "Jul 24 13:33:39 ..... finished successfully\n" ] } ], "source": [ "STAR \\\n", " --runMode genomeGenerate \\\n", " --genomeDir $GENOME_DIR \\\n", " --genomeFastaFiles ${FASTA} \\\n", " --sjdbGTFfile ${GTF} \\\n", " --outFileNamePrefix ${STAR_OUT}/genome_ \\\n", " --genomeSAindexNbases 11\n", "# --genomeSAindexNbases 6" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "Bash", "language": "bash", "name": "bash" }, "language_info": { "codemirror_mode": "shell", "file_extension": ".sh", "mimetype": "text/x-sh", "name": "bash" } }, "nbformat": 4, "nbformat_minor": 2 }