In [1]:
source bioinf_intro_config.sh
rm -rf $IGV_DIR $STAR_OUT
mkdir -p $TRIMMED $STAR_OUT $IGV_DIR
Run with shorter intron limit¶
IGV needs indices for the BAM files. The index allows it to quickly load reads from different parts of the genome.
In [2]:
for FASTQ in $RAW_FASTQS/27_MA_P_S38_L00[1]_R1_001.fastq.gz
do
FASTQ_BASE="$(basename ${FASTQ} '_001.fastq.gz')"
echo "---------------- TRIMMING: $FASTQ_BASE ----------------"
fastq-mcf \
$MYINFO/neb_e7600_adapters.fasta \
$RAW_FASTQS/${FASTQ_BASE}_001.fastq.gz \
-q 20 -x 0.5 \
-o $TRIMMED/${FASTQ_BASE}_001.trim.fastq.gz
echo "---------------- MAPPING: $FASTQ_BASE ----------------"
STAR \
--runMode alignReads \
--twopassMode None \
--genomeDir $GENOME_DIR \
--readFilesIn $TRIMMED/${FASTQ_BASE}_001.trim.fastq.gz \
--readFilesCommand gunzip -c \
--outFileNamePrefix ${STAR_OUT}/${FASTQ_BASE}_short_introns_ \
--quantMode GeneCounts \
--outSAMtype BAM SortedByCoordinate \
--alignIntronMax 5000 \
--outSJfilterIntronMaxVsReadN 500 1000 2000 \
echo "---------------- INDEXING BAM: $FASTQ_BASE ----------------"
samtools index ${STAR_OUT}/${FASTQ_BASE}_short_introns_Aligned.sortedByCoord.out.bam
done
---------------- TRIMMING: 27_MA_P_S38_L00[1]_R1 ----------------
Command Line: /Users/cliburn/work/scratch/bioinf_intro/myinfo/neb_e7600_adapters.fasta /data/hts2018_pilot/Granek_4837_180427A5/27_MA_P_S38_L00[1]_R1_001.fastq.gz -q 20 -x 0.5 -o /Users/cliburn/work/scratch/bioinf_intro/trimmed_fastqs/27_MA_P_S38_L00[1]_R1_001.trim.fastq.gz
Scale used: 2.2
gunzip: can't stat: /data/hts2018_pilot/Granek_4837_180427A5/27_MA_P_S38_L00[1]_R1_001.fastq.gz (/data/hts2018_pilot/Granek_4837_180427A5/27_MA_P_S38_L00[1]_R1_001.fastq.gz.gz): No such file or directory
Phred: 64
No records in file /data/hts2018_pilot/Granek_4837_180427A5/27_MA_P_S38_L00[1]_R1_001.fastq.gz
---------------- MAPPING: 27_MA_P_S38_L00[1]_R1 ----------------
STAR: Bad Option: --runMode.
Usage: STAR cmd [options] [-find] file1 ... filen [find expression]
Use STAR -help
and STAR -xhelp
to get a list of valid cmds and options.
Use STAR H=help
to get a list of valid archive header formats.
Use STAR diffopts=help
to get a list of valid diff options.
---------------- INDEXING BAM: 27_MA_P_S38_L00[1]_R1 ----------------
[E::hts_open_format] fail to open file '/Users/cliburn/work/scratch/bioinf_intro/star_out/27_MA_P_S38_L00[1]_R1_short_introns_Aligned.sortedByCoord.out.bam'
samtools index: failed to open "/Users/cliburn/work/scratch/bioinf_intro/star_out/27_MA_P_S38_L00[1]_R1_short_introns_Aligned.sortedByCoord.out.bam": No such file or directory
In [3]:
ls -ltr ${STAR_OUT}
Link Directory¶
In [4]:
ln -s ${STAR_OUT}/*.bam* $GTF $FASTA $IGV_DIR
Tarring¶
In [5]:
tar --dereference \
--create \
--gzip \
--verbose \
--file $CUROUT/stuff_for_igv.tgz \
--directory $CUROUT \
$(basename $IGV_DIR)
a igv
a igv/Cryptococcus_neoformans_var_grubii_h99.CNA3.dna.toplevel.fa
a igv/Cryptococcus_neoformans_var_grubii_h99.CNA3.39.gtf
a igv/*.bam*