{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "# Set environment" ] }, { "cell_type": "code", "execution_count": 1, "metadata": {}, "outputs": [], "source": [ "cleanup () { \n", " :\n", "}\n", "\n", "trap \"cleanup\" SIGPIPE" ] }, { "cell_type": "code", "execution_count": 3, "metadata": {}, "outputs": [], "source": [ "set -u" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "set directory" ] }, { "cell_type": "code", "execution_count": 2, "metadata": {}, "outputs": [], "source": [ "CURDIR=/home/jovyan/work/HTS2018\n", "INFODIR=${CURDIR}/Info\n", "PATHFILE=/home/jovyan/work/HTS-R25-DEV-2018/Info/PathwaysByGeneIds_Summary.txt" ] }, { "cell_type": "code", "execution_count": 4, "metadata": {}, "outputs": [], "source": [ "mkdir -p $INFODIR" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "# Read data" ] }, { "cell_type": "code", "execution_count": 5, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Pathway Id\tMap - Painted With Transformed Genes (new window)\tPathway\tUnique Gene Count\tGenes\n", "ec00010\t\"ec00010 (decorated)\"\"\"\tGlycolysis / Gluconeogenesis\t34\tCNAG_00038 | CNAG_00057 | CNAG_00515 | CNAG_00735 | CNAG_00797 | CNAG_01078 | CNAG_01120 | CNAG_01675 | CNAG_01820 | CNAG_01955 | CNAG_02035 | CNAG_02377 | CNAG_02489 | CNAG_02736 | CNAG_02903 | CNAG_03072 | CNAG_03358 | CNAG_03916 | CNAG_04217 | CNAG_04523 | CNAG_04659 | CNAG_04676 | CNAG_05059 | CNAG_05113 | CNAG_06035 | CNAG_06313 | CNAG_06628 | CNAG_06699 | CNAG_06770 | CNAG_07004 | CNAG_07316 | CNAG_07559 | CNAG_07660 | CNAG_07745\t\n", "ec00020\t\" $INFODIR/pathway_names.txt" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### gene list" ] }, { "cell_type": "code", "execution_count": 8, "metadata": {}, "outputs": [], "source": [ "cat $PATHFILE | tail -n +2 | cut -f 4 | grep '^CNAG' > $INFODIR/pathway_genes.txt" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### check if the files are created\n", "make sure the file size is not zero" ] }, { "cell_type": "code", "execution_count": 9, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "-rw-r--r-- 1 jovyan users 425126 Jul 17 16:48 /home/jovyan/work/HTS2018/Info/pathway_genes.txt\n", "-rw-r--r-- 1 jovyan users 18144 Jul 17 16:48 /home/jovyan/work/HTS2018/Info/pathway_names.txt\n" ] } ], "source": [ "ls -l $INFODIR/pathway_*" ] }, { "cell_type": "code", "execution_count": 10, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "ec00010\n", "ec00020\n", "ec00030\n" ] } ], "source": [ "head -3 $INFODIR/pathway_names.txt" ] }, { "cell_type": "code", "execution_count": 11, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "CNAG_00038 | CNAG_00057 | CNAG_00515 | CNAG_00735 | CNAG_00797 | CNAG_01078 | CNAG_01120 | CNAG_01675 | CNAG_01820 | CNAG_01955 | CNAG_02035 | CNAG_02377 | CNAG_02489 | CNAG_02736 | CNAG_02903 | CNAG_03072 | CNAG_03358 | CNAG_03916 | CNAG_04217 | CNAG_04523 | CNAG_04659 | CNAG_04676 | CNAG_05059 | CNAG_05113 | CNAG_06035 | CNAG_06313 | CNAG_06628 | CNAG_06699 | CNAG_06770 | CNAG_07004 | CNAG_07316 | CNAG_07559 | CNAG_07660 | CNAG_07745\n", "CNAG_00061 | CNAG_00747 | CNAG_01120 | CNAG_01264 | CNAG_01657 | CNAG_01680 | CNAG_02736 | CNAG_03225 | CNAG_03226 | CNAG_03266 | CNAG_03375 | CNAG_03596 | CNAG_03674 | CNAG_03920 | CNAG_04189 | CNAG_04217 | CNAG_04468 | CNAG_04535 | CNAG_04640 | CNAG_05059 | CNAG_05236 | CNAG_05907 | CNAG_07004 | CNAG_07356 | CNAG_07363 | CNAG_07660 | CNAG_07851 | CNAG_07944\n", "CNAG_00030 | CNAG_00057 | CNAG_00684 | CNAG_00827 | CNAG_01216 | CNAG_01395 | CNAG_01541 | CNAG_01675 | CNAG_01984 | CNAG_02133 | CNAG_02296 | CNAG_03048 | CNAG_03245 | CNAG_03335 | CNAG_03882 | CNAG_03916 | CNAG_04676 | CNAG_05365 | CNAG_05379 | CNAG_06313 | CNAG_06770 | CNAG_07445 | CNAG_07561\n" ] } ], "source": [ "head -3 $INFODIR/pathway_genes.txt" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Check if both contain same number of lines\n", "We need to make sure each pathway id match with one gene list" ] }, { "cell_type": "code", "execution_count": 12, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "1859\n" ] } ], "source": [ "cat $PATHFILE | tail -n +2 | cut -f 1 | grep '^\\w' | wc -l" ] }, { "cell_type": "code", "execution_count": 13, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "1859\n" ] } ], "source": [ "cat $PATHFILE | tail -n +2 | cut -f 4 | grep ^CNAG | wc -l" ] } ], "metadata": { "kernelspec": { "display_name": "Bash", "language": "bash", "name": "bash" }, "language_info": { "codemirror_mode": "shell", "file_extension": ".sh", "mimetype": "text/x-sh", "name": "bash" } }, "nbformat": 4, "nbformat_minor": 2 }