{
"cells": [
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Set environment"
]
},
{
"cell_type": "code",
"execution_count": 1,
"metadata": {},
"outputs": [],
"source": [
"cleanup () { \n",
" :\n",
"}\n",
"\n",
"trap \"cleanup\" SIGPIPE"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {},
"outputs": [],
"source": [
"set -u"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"set directory"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {},
"outputs": [],
"source": [
"CURDIR=/home/jovyan/work/HTS2018\n",
"INFODIR=${CURDIR}/Info\n",
"PATHFILE=/home/jovyan/work/HTS-R25-DEV-2018/Info/PathwaysByGeneIds_Summary.txt"
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {},
"outputs": [],
"source": [
"mkdir -p $INFODIR"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Read data"
]
},
{
"cell_type": "code",
"execution_count": 5,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Pathway Id\tMap - Painted With Transformed Genes (new window)\tPathway\tUnique Gene Count\tGenes\n",
"ec00010\t\"ec00010 (decorated)\"\"\"\tGlycolysis / Gluconeogenesis\t34\tCNAG_00038 | CNAG_00057 | CNAG_00515 | CNAG_00735 | CNAG_00797 | CNAG_01078 | CNAG_01120 | CNAG_01675 | CNAG_01820 | CNAG_01955 | CNAG_02035 | CNAG_02377 | CNAG_02489 | CNAG_02736 | CNAG_02903 | CNAG_03072 | CNAG_03358 | CNAG_03916 | CNAG_04217 | CNAG_04523 | CNAG_04659 | CNAG_04676 | CNAG_05059 | CNAG_05113 | CNAG_06035 | CNAG_06313 | CNAG_06628 | CNAG_06699 | CNAG_06770 | CNAG_07004 | CNAG_07316 | CNAG_07559 | CNAG_07660 | CNAG_07745\t\n",
"ec00020\t\" $INFODIR/pathway_names.txt"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### gene list"
]
},
{
"cell_type": "code",
"execution_count": 8,
"metadata": {},
"outputs": [],
"source": [
"cat $PATHFILE | tail -n +2 | cut -f 4 | grep '^CNAG' > $INFODIR/pathway_genes.txt"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### check if the files are created\n",
"make sure the file size is not zero"
]
},
{
"cell_type": "code",
"execution_count": 9,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"-rw-r--r-- 1 jovyan users 425126 Jul 17 16:48 /home/jovyan/work/HTS2018/Info/pathway_genes.txt\n",
"-rw-r--r-- 1 jovyan users 18144 Jul 17 16:48 /home/jovyan/work/HTS2018/Info/pathway_names.txt\n"
]
}
],
"source": [
"ls -l $INFODIR/pathway_*"
]
},
{
"cell_type": "code",
"execution_count": 10,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"ec00010\n",
"ec00020\n",
"ec00030\n"
]
}
],
"source": [
"head -3 $INFODIR/pathway_names.txt"
]
},
{
"cell_type": "code",
"execution_count": 11,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"CNAG_00038 | CNAG_00057 | CNAG_00515 | CNAG_00735 | CNAG_00797 | CNAG_01078 | CNAG_01120 | CNAG_01675 | CNAG_01820 | CNAG_01955 | CNAG_02035 | CNAG_02377 | CNAG_02489 | CNAG_02736 | CNAG_02903 | CNAG_03072 | CNAG_03358 | CNAG_03916 | CNAG_04217 | CNAG_04523 | CNAG_04659 | CNAG_04676 | CNAG_05059 | CNAG_05113 | CNAG_06035 | CNAG_06313 | CNAG_06628 | CNAG_06699 | CNAG_06770 | CNAG_07004 | CNAG_07316 | CNAG_07559 | CNAG_07660 | CNAG_07745\n",
"CNAG_00061 | CNAG_00747 | CNAG_01120 | CNAG_01264 | CNAG_01657 | CNAG_01680 | CNAG_02736 | CNAG_03225 | CNAG_03226 | CNAG_03266 | CNAG_03375 | CNAG_03596 | CNAG_03674 | CNAG_03920 | CNAG_04189 | CNAG_04217 | CNAG_04468 | CNAG_04535 | CNAG_04640 | CNAG_05059 | CNAG_05236 | CNAG_05907 | CNAG_07004 | CNAG_07356 | CNAG_07363 | CNAG_07660 | CNAG_07851 | CNAG_07944\n",
"CNAG_00030 | CNAG_00057 | CNAG_00684 | CNAG_00827 | CNAG_01216 | CNAG_01395 | CNAG_01541 | CNAG_01675 | CNAG_01984 | CNAG_02133 | CNAG_02296 | CNAG_03048 | CNAG_03245 | CNAG_03335 | CNAG_03882 | CNAG_03916 | CNAG_04676 | CNAG_05365 | CNAG_05379 | CNAG_06313 | CNAG_06770 | CNAG_07445 | CNAG_07561\n"
]
}
],
"source": [
"head -3 $INFODIR/pathway_genes.txt"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Check if both contain same number of lines\n",
"We need to make sure each pathway id match with one gene list"
]
},
{
"cell_type": "code",
"execution_count": 12,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"1859\n"
]
}
],
"source": [
"cat $PATHFILE | tail -n +2 | cut -f 1 | grep '^\\w' | wc -l"
]
},
{
"cell_type": "code",
"execution_count": 13,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"1859\n"
]
}
],
"source": [
"cat $PATHFILE | tail -n +2 | cut -f 4 | grep ^CNAG | wc -l"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Bash",
"language": "bash",
"name": "bash"
},
"language_info": {
"codemirror_mode": "shell",
"file_extension": ".sh",
"mimetype": "text/x-sh",
"name": "bash"
}
},
"nbformat": 4,
"nbformat_minor": 2
}