In [1]:
source bioinf_intro_config.sh
rm -rf $IGV_DIR $STAR_OUT
mkdir -p $TRIMMED $STAR_OUT $IGV_DIR

Run with shorter intron limit

IGV needs indices for the BAM files. The index allows it to quickly load reads from different parts of the genome.

In [2]:
for FASTQ in $RAW_FASTQS/27_MA_P_S38_L00[1]_R1_001.fastq.gz
    do
        FASTQ_BASE="$(basename ${FASTQ} '_001.fastq.gz')"
        echo "---------------- TRIMMING: $FASTQ_BASE ----------------"
        fastq-mcf \
            $MYINFO/neb_e7600_adapters.fasta \
            $RAW_FASTQS/${FASTQ_BASE}_001.fastq.gz \
            -q 20 -x 0.5 \
            -o $TRIMMED/${FASTQ_BASE}_001.trim.fastq.gz

        echo "---------------- MAPPING: $FASTQ_BASE ----------------"
        STAR \
            --runMode alignReads \
            --twopassMode None \
            --genomeDir $GENOME_DIR \
            --readFilesIn $TRIMMED/${FASTQ_BASE}_001.trim.fastq.gz \
            --readFilesCommand gunzip -c \
            --outFileNamePrefix ${STAR_OUT}/${FASTQ_BASE}_short_introns_ \
            --quantMode GeneCounts \
            --outSAMtype BAM SortedByCoordinate \
            --alignIntronMax 5000 \
            --outSJfilterIntronMaxVsReadN 500 1000 2000 \

        echo "---------------- INDEXING BAM: $FASTQ_BASE ----------------"
        samtools index ${STAR_OUT}/${FASTQ_BASE}_short_introns_Aligned.sortedByCoord.out.bam
    done
---------------- TRIMMING: 27_MA_P_S38_L00[1]_R1 ----------------
Command Line: /Users/cliburn/work/scratch/bioinf_intro/myinfo/neb_e7600_adapters.fasta /data/hts2018_pilot/Granek_4837_180427A5/27_MA_P_S38_L00[1]_R1_001.fastq.gz -q 20 -x 0.5 -o /Users/cliburn/work/scratch/bioinf_intro/trimmed_fastqs/27_MA_P_S38_L00[1]_R1_001.trim.fastq.gz
Scale used: 2.2
gunzip: can't stat: /data/hts2018_pilot/Granek_4837_180427A5/27_MA_P_S38_L00[1]_R1_001.fastq.gz (/data/hts2018_pilot/Granek_4837_180427A5/27_MA_P_S38_L00[1]_R1_001.fastq.gz.gz): No such file or directory
Phred: 64
No records in file /data/hts2018_pilot/Granek_4837_180427A5/27_MA_P_S38_L00[1]_R1_001.fastq.gz
---------------- MAPPING: 27_MA_P_S38_L00[1]_R1 ----------------
STAR: Bad Option: --runMode.
Usage:  STAR cmd [options] [-find] file1 ... filen [find expression]

Use     STAR -help
and     STAR -xhelp
to get a list of valid cmds and options.

Use     STAR H=help
to get a list of valid archive header formats.

Use     STAR diffopts=help
to get a list of valid diff options.
---------------- INDEXING BAM: 27_MA_P_S38_L00[1]_R1 ----------------
[E::hts_open_format] fail to open file '/Users/cliburn/work/scratch/bioinf_intro/star_out/27_MA_P_S38_L00[1]_R1_short_introns_Aligned.sortedByCoord.out.bam'
samtools index: failed to open "/Users/cliburn/work/scratch/bioinf_intro/star_out/27_MA_P_S38_L00[1]_R1_short_introns_Aligned.sortedByCoord.out.bam": No such file or directory

In [3]:
ls -ltr ${STAR_OUT}

Tarring

In [5]:
tar --dereference \
    --create \
    --gzip \
    --verbose \
    --file $CUROUT/stuff_for_igv.tgz \
    --directory $CUROUT \
    $(basename $IGV_DIR)
a igv
a igv/Cryptococcus_neoformans_var_grubii_h99.CNA3.dna.toplevel.fa
a igv/Cryptococcus_neoformans_var_grubii_h99.CNA3.39.gtf
a igv/*.bam*