Probabilistic Graphical Models with `pgmpy`¶

!pip install pgmpy

Collecting pgmpy
  Downloading pgmpy-0.1.2.tar.gz (147kB)
[K    100% |████████████████████████████████| 153kB 5.9MB/s
[?25hRequirement already satisfied (use --upgrade to upgrade): networkx>=1.8.1 in /opt/conda/lib/python3.5/site-packages (from pgmpy)
Requirement already satisfied (use --upgrade to upgrade): scipy>=0.12.1 in /opt/conda/lib/python3.5/site-packages (from pgmpy)
Requirement already satisfied (use --upgrade to upgrade): numpy>=1.7.0 in /opt/conda/lib/python3.5/site-packages (from pgmpy)
Requirement already satisfied (use --upgrade to upgrade): nose>=1.3.0 in /opt/conda/lib/python3.5/site-packages (from pgmpy)
Collecting coveralls>=0.4 (from pgmpy)
  Downloading coveralls-1.1-py2.py3-none-any.whl
Requirement already satisfied (use --upgrade to upgrade): decorator>=3.4.0 in /opt/conda/lib/python3.5/site-packages (from networkx>=1.8.1->pgmpy)
Collecting docopt>=0.6.1 (from coveralls>=0.4->pgmpy)
  Downloading docopt-0.6.2.tar.gz
Collecting coverage>=3.6 (from coveralls>=0.4->pgmpy)
  Downloading coverage-4.3.4-cp35-cp35m-manylinux1_x86_64.whl (191kB)
[K    100% |████████████████████████████████| 194kB 4.7MB/s
[?25hRequirement already satisfied (use --upgrade to upgrade): requests>=1.0.0 in /opt/conda/lib/python3.5/site-packages (from coveralls>=0.4->pgmpy)
Building wheels for collected packages: pgmpy, docopt
  Running setup.py bdist_wheel for pgmpy ... [?25l-  | done
[?25h  Stored in directory: /home/jovyan/.cache/pip/wheels/d3/21/0f/5b1fc282ee2ab16b693c1a0ed9cb8fde44dbaa28d907c90ff4
  Running setup.py bdist_wheel for docopt ... [?25l-  done
[?25h  Stored in directory: /home/jovyan/.cache/pip/wheels/b2/16/5f/c33a2bb5f2dce71205f8e65cbfd05647d79d441282be31fd82
Successfully built pgmpy docopt
Installing collected packages: docopt, coverage, coveralls, pgmpy
Successfully installed coverage-4.3.4 coveralls-1.1 docopt-0.6.2 pgmpy-0.1.2
[33mYou are using pip version 8.1.2, however version 9.0.1 is available.
You should consider upgrading via the 'pip install --upgrade pip' command.[0m

from pgmpy.factors import TabularCPD

# Declare a CPD
grade_cpd = TabularCPD(variable="G",
    variable_card=3,
    values=[[0.3, 0.05, 0.9, 0.5],
    [0.4, 0.25, 0.08, 0.3],
    [0.3, 0.7, 0.02, 0.2]],
    evidence=["I", "D"],
    evidence_card=[2, 2])
grade_cpd

TabularCPD for G
D	D_0		D_1
I	I_0	I_1	I_0	I_1
G_0	0.3000	0.0500	0.9000	0.5000
G_1	0.4000	0.2500	0.0800	0.3000
G_2	0.3000	0.7000	0.0200	0.2000

# Declare the sudent model in pgmpy

from pgmpy.models import BayesianModel
from pgmpy.factors import TabularCPD

# Define nodes and edges
student_model = BayesianModel([("D", "G"),
("I", "G"),
("G", "L"),
("I", "S")])

#Define CPDs

grade_cpd = TabularCPD(
variable="G",
variable_card=3,
values=[[0.3, 0.05, 0.9, 0.5],
[0.4, 0.25, 0.08, 0.3],
[0.3, 0.7, 0.02, 0.2]],
evidence=["I", "D"],
evidence_card=[2, 2])

difficulty_cpd = TabularCPD(
variable="D",
variable_card=2,
values=[[0.6, 0.4]])

intel_cpd = TabularCPD(
variable="I",
variable_card=2,
values=[[0.7, 0.3]])

letter_cpd = TabularCPD(
variable="L",
variable_card=2,
values=[[0.1, 0.4, 0.99],
[0.9, 0.6, 0.01]],
evidence=["G"],
evidence_card=[3])

sat_cpd = TabularCPD(
variable="S",
variable_card=2,
values=[[0.95, 0.2],
[0.05, 0.8]],
evidence=["I"],
evidence_card=[2])


#Add CPDs to nodes and edges
student_model.add_cpds(grade_cpd, difficulty_cpd,
intel_cpd, letter_cpd,
sat_cpd)

grade_cpd

TabularCPD for G
D	D_0		D_1
I	I_0	I_1	I_0	I_1
G_0	0.3000	0.0500	0.9000	0.5000
G_1	0.4000	0.2500	0.0800	0.3000
G_2	0.3000	0.7000	0.0200	0.2000

student_model.get_cpds('G')

TabularCPD for G
D	D_0		D_1
I	I_0	I_1	I_0	I_1
G_0	0.3000	0.0500	0.9000	0.5000
G_1	0.4000	0.2500	0.0800	0.3000
G_2	0.3000	0.7000	0.0200	0.2000

student_model.get_parents('G')

['D', 'I']

from pgmpy.inference import VariableElimination

student_infer = VariableElimination(student_model)

prob_G = student_infer.query(variables='G')

print(prob_G['G'])

╒═════╤══════════╕
│ G   │   phi(G) │
╞═════╪══════════╡
│ G_0 │   0.4470 │
├─────┼──────────┤
│ G_1 │   0.2714 │
├─────┼──────────┤
│ G_2 │   0.2816 │
╘═════╧══════════╛

prob_G = student_infer.query(variables='G', evidence={'I': 1, 'D' : 0})

print(prob_G['G'])

╒═════╤══════════╕
│ G   │   phi(G) │
╞═════╪══════════╡
│ G_0 │   0.0500 │
├─────┼──────────┤
│ G_1 │   0.2500 │
├─────┼──────────┤
│ G_2 │   0.7000 │
╘═════╧══════════╛

prob_G = student_infer.query(variables='G', evidence={'I': 0, 'D' : 1})

print(prob_G['G'])

╒═════╤══════════╕
│ G   │   phi(G) │
╞═════╪══════════╡
│ G_0 │   0.9000 │
├─────┼──────────┤
│ G_1 │   0.0800 │
├─────┼──────────┤
│ G_2 │   0.0200 │
╘═════╧══════════╛

#Train Model from Data

from pgmpy.models import BayesianModel
import pandas as pd
import numpy as np
# Considering that each variable have only 2 states,
# we can generate some random data.

raw_data = np.random.randint(low=0,high=2,size=(1000, 5))


data = pd.DataFrame(raw_data,columns=["D", "I", "G","L", "S"])

print(data[: int(data.shape[0]*0.75)])

data_train = data[: int(data.shape[0] * 0.75)]

student_model = BayesianModel([("D", "G"),("I", "G"),("I", "S"),("G", "L")])
student_model.fit(data_train)
student_model.get_cpds('D')

     D  I  G  L  S
  0  1  1  1  1
  0  0  0  0  1
  0  1  1  0  0
  0  0  0  0  1
  1  1  0  1  1
  1  0  0  0  0
  1  1  0  1  1
  1  0  0  0  1
  1  1  0  0  1
  1  0  0  0  0
 1  1  1  1  0
 1  1  0  0  0
 1  1  1  1  0
 1  0  0  1  1
 0  1  1  1  1
 1  0  0  1  1
 1  1  0  1  1
 0  0  0  1  0
 0  0  0  0  0
 1  1  1  1  1
 0  0  0  1  1
 0  0  0  0  0
 0  0  1  0  0
 0  0  0  1  0
 0  0  1  1  1
 1  0  1  1  1
 0  1  1  1  0
 0  1  0  0  1
 1  1  0  0  1
 1  0  0  0  0
..  .. .. .. .. ..
1  1  1  0  1
0  0  1  1  1
1  1  1  0  1
1  0  0  0  0
1  0  0  1  1
0  0  1  1  1
0  0  1  0  1
0  0  1  0  0
1  1  1  0  0
1  1  0  0  0
0  0  0  0  0
1  1  1  0  0
0  0  1  0  1
1  1  0  1  0
0  1  0  1  1
0  1  1  1  0
1  0  1  0  0
1  1  1  1  0
1  1  1  1  1
0  1  1  0  0
1  1  1  1  1
1  0  1  1  0
0  1  1  0  1
1  0  1  1  0
0  1  1  1  1
1  0  0  0  1
1  0  0  1  0
0  1  1  0  1
1  0  0  1  1
0  0  1  1  1

[750 rows x 5 columns]

TabularCPD for D
D_0	0.4400
D_1	0.5600

student_model.get_cpds('L')

TabularCPD for L
G	G_0	G_1
L_0	0.4545	0.5000
L_1	0.5455	0.5000

student_model.active_trail_nodes('D')

{'D', 'G', 'L'}

student_model.local_independencies('G')

(G _|_ S | D, I)

student_model.get_independencies()

(G _|_ L, I, S | D)
(G _|_ L, I, D | S)
(G _|_ I, S, D | L)
(G _|_ L, D | I)
(D _|_ I, S | G)
(D _|_ G, L | S)
(D _|_ G, I, S | L)
(D _|_ G, L | I)
(S _|_ I, D | G)
(S _|_ G, I, L | D)
(S _|_ G, I, D | L)
(L _|_ G, I, S | D)
(L _|_ G, I, D | S)
(L _|_ G, D | I)
(I _|_ D, S | G)
(I _|_ G, S, L | D)
(I _|_ G, L | S)
(I _|_ G, D, S | L)

data_test = data[int(0.75 * data.shape[0]) : data.shape[0]]

data_test.drop('G', axis=1, inplace=True)

student_model.predict(data_test)

/opt/conda/lib/python3.5/site-packages/ipykernel/__main__.py:3: SettingWithCopyWarning:
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  app.launch_new_instance()

	G
750	1
751	0
752	0
753	0
754	0
755	0
756	0
757	1
758	0
759	1
760	0
761	0
762	1
763	1
764	0
765	1
766	1
767	0
768	0
769	1
770	1
771	1
772	0
773	1
774	1
775	0
776	1
777	1
778	0
779	0
...	...
970	1
971	0
972	1
973	1
974	1
975	0
976	1
977	0
978	1
979	1
980	1
981	1
982	1
983	0
984	1
985	1
986	1
987	0
988	0
989	1
990	0
991	0
992	0
993	0
994	0
995	1
996	1
997	1
998	0
999	0

250 rows × 1 columns

Probabilistic Graphical Models with `pgmpy`¶

Table Of Contents

Previous page

Next page

This Page

	G
750	1
751	0
752	0
753	0
754	0
755	0
756	0
757	1
758	0
759	1
760	0
761	0
762	1
763	1
764	0
765	1
766	1
767	0
768	0
769	1
770	1
771	1
772	0
773	1
774	1
775	0
776	1
777	1
778	0
779	0
...	...
970	1
971	0
972	1
973	1
974	1
975	0
976	1
977	0
978	1
979	1
980	1
981	1
982	1
983	0
984	1
985	1
986	1
987	0
988	0
989	1
990	0
991	0
992	0
993	0
994	0
995	1
996	1
997	1
998	0
999	0

	G
750	1
751	0
752	0
753	0
754	0
755	0
756	0
757	1
758	0
759	1
760	0
761	0
762	1
763	1
764	0
765	1
766	1
767	0
768	0
769	1
770	1
771	1
772	0
773	1
774	1
775	0
776	1
777	1
778	0
779	0
...	...
970	1
971	0
972	1
973	1
974	1
975	0
976	1
977	0
978	1
979	1
980	1
981	1
982	1
983	0
984	1
985	1
986	1
987	0
988	0
989	1
990	0
991	0
992	0
993	0
994	0
995	1
996	1
997	1
998	0
999	0

Probabilistic Graphical Models with pgmpy¶

Probabilistic Graphical Models with `pgmpy`¶

	G
750	1
751	0
752	0
753	0
754	0
755	0
756	0
757	1
758	0
759	1
760	0
761	0
762	1
763	1
764	0
765	1
766	1
767	0
768	0
769	1
770	1
771	1
772	0
773	1
774	1
775	0
776	1
777	1
778	0
779	0
...	...
970	1
971	0
972	1
973	1
974	1
975	0
976	1
977	0
978	1
979	1
980	1
981	1
982	1
983	0
984	1
985	1
986	1
987	0
988	0
989	1
990	0
991	0
992	0
993	0
994	0
995	1
996	1
997	1
998	0
999	0