#!/usr/bin/env python
#
# Tutorial on using GHMM with Python
# http://ghmm.sourceforge.net/ghmm-python-tutorial.html
# Ported to python code for lazy programmers ;-)
#
# In the following, we assume that you have installed GHMM including the Python
# bindings. Download the UnfairCasino.py-file.
#
# You might have seen the unfair casino example (Chair Biological Sequence
# Analysis, Durbin et. al, 1998), where a dealer in a casino occasionally
# exchanges a fair dice with a loaded one. We do not know if and when this
# exchange happens, we only can observe the throws. This stochastic process we
# will model with a HMM.

import ghmm
print "For aditional help use: help('ghmm')"

#Creating the first model:

# There are two states in our example. Either the dice is fair (state 0; Python
# indexes arrays like C and C++ from 0) or it is loaded (state 1). We will
# define the transition and emission matrices explicitly.

# To save us some typing (namely ghmm. before any class or function from GHMM),
# we will import all symbols from ghmm directly.

from ghmm import *

# Our alphabet are the numbers on the dice {1,2,3,4,5,6}. To be consistent with
# Python's range function the upper limit is not part of a range.

sigma = IntegerRange(1,7)

# The transition matrix A is chosen such that we will stay longer in the fair
# than the loaded state. The emission probabilities are set accordingly in B ---
# guessing equal probabilities for the fair dice and higher probabilities for
# ones and twos etc. for the loaded dice --- and the initial state distribution
# pi has no preference for either state.

A = [[0.9, 0.1], [0.3, 0.7]]
efair = [1.0 / 6] * 6
print "Fair probabilities:", efair

eloaded = [3.0 / 13, 3.0 / 13, 2.0 / 13, 2.0 / 13, 2.0 / 13, 1.0 / 13]
B = [efair, eloaded]
pi = [0.5] * 2
m = HMMFromMatrices(sigma, DiscreteDistribution(sigma), A, B, pi)
print "Inital HHM", m

# Generating sequences

# At this time GHMM still exposes some internals. For example, symbols from
# discrete alphabets are internally represented as 0,...,|alphabet| -1 and the
# alphabet is responsible for translating between internal and external
# representations.

# First we sample from the model, specifying the length of the sequence, and
# then transfer the observations to the external representation.

obs_seq = m.sampleSingle(20)
print "Observation sequence : ", obs_seq
obs = map(sigma.external, obs_seq)
print "Observations         : ", obs

# Learning from data

# In a real application we would want to train the model. That is, we would
# like to estimate the parameters of the model from data with the goal to
# maximize the likelihood of that data given the model. This is done for HMMs
# with the Baum-Welch algorithm which is actually an instance of the well-known
# Expectation-Maximization procedure for missing data problems in statistics.
# The process is iterative and hence sometime called re-estimation.

# We can use m as a starting point and use some 'real' data to train it. The
# variable train_seq is an EmissionSequence or SequenceSet object.

 
from UnfairCasino import train_seq 
m.baumWelch(train_seq)
print "Trained using baumWelch and 'train_seq'"
print m

# If you want more fine-grained control over the learning procedure, you can do
# single steps and monitor the relevant diagnostics yourself, or employ
# meta-heuristics such as noise-injection to avoid getting stuck in local
# maxima. [Currently for continous emissions only] Computing a Viterbi-path

# A Viterbi-path is a sequence of states Q maximizing the probability
# P[Q|observations, model]. If we compute a Viterbi-path for a unfair casino
# sequence of throws, the state sequence tells us when the casino was fair
# (state 0) and when it was unfair (state 1).


from UnfairCasino import test_seq
v = m.viterbi(test_seq)
print "fairness of test_seq: ", v

# You can investigate how successful your model is at detecting cheating. Just
# create artificial observations and see if you can detect them.

my_seq = EmissionSequence(sigma, [1] * 20 + [6] * 10 + [1] * 40)
print "artificial sequence    : ", my_seq
print "artificial observation : ", m.viterbi(my_seq)

print '''
#
# XXX: Some error on closing data, save to ignore as we are finished anyways
#
'''

# EOF