#!/usr/bin/env python # # Tutorial on using GHMM with Python # http://ghmm.sourceforge.net/ghmm-python-tutorial.html # Ported to python code for lazy programmers ;-) # # In the following, we assume that you have installed GHMM including the Python # bindings. Download the UnfairCasino.py-file. # # You might have seen the unfair casino example (Chair Biological Sequence # Analysis, Durbin et. al, 1998), where a dealer in a casino occasionally # exchanges a fair dice with a loaded one. We do not know if and when this # exchange happens, we only can observe the throws. This stochastic process we # will model with a HMM. import ghmm print "For aditional help use: help('ghmm')" #Creating the first model: # There are two states in our example. Either the dice is fair (state 0; Python # indexes arrays like C and C++ from 0) or it is loaded (state 1). We will # define the transition and emission matrices explicitly. # To save us some typing (namely ghmm. before any class or function from GHMM), # we will import all symbols from ghmm directly. from ghmm import * # Our alphabet are the numbers on the dice {1,2,3,4,5,6}. To be consistent with # Python's range function the upper limit is not part of a range. sigma = IntegerRange(1,7) # The transition matrix A is chosen such that we will stay longer in the fair # than the loaded state. The emission probabilities are set accordingly in B --- # guessing equal probabilities for the fair dice and higher probabilities for # ones and twos etc. for the loaded dice --- and the initial state distribution # pi has no preference for either state. A = [[0.9, 0.1], [0.3, 0.7]] efair = [1.0 / 6] * 6 print "Fair probabilities:", efair eloaded = [3.0 / 13, 3.0 / 13, 2.0 / 13, 2.0 / 13, 2.0 / 13, 1.0 / 13] B = [efair, eloaded] pi = [0.5] * 2 m = HMMFromMatrices(sigma, DiscreteDistribution(sigma), A, B, pi) print "Inital HHM", m # Generating sequences # At this time GHMM still exposes some internals. For example, symbols from # discrete alphabets are internally represented as 0,...,|alphabet| -1 and the # alphabet is responsible for translating between internal and external # representations. # First we sample from the model, specifying the length of the sequence, and # then transfer the observations to the external representation. obs_seq = m.sampleSingle(20) print "Observation sequence : ", obs_seq obs = map(sigma.external, obs_seq) print "Observations : ", obs # Learning from data # In a real application we would want to train the model. That is, we would # like to estimate the parameters of the model from data with the goal to # maximize the likelihood of that data given the model. This is done for HMMs # with the Baum-Welch algorithm which is actually an instance of the well-known # Expectation-Maximization procedure for missing data problems in statistics. # The process is iterative and hence sometime called re-estimation. # We can use m as a starting point and use some 'real' data to train it. The # variable train_seq is an EmissionSequence or SequenceSet object. from UnfairCasino import train_seq m.baumWelch(train_seq) print "Trained using baumWelch and 'train_seq'" print m # If you want more fine-grained control over the learning procedure, you can do # single steps and monitor the relevant diagnostics yourself, or employ # meta-heuristics such as noise-injection to avoid getting stuck in local # maxima. [Currently for continous emissions only] Computing a Viterbi-path # A Viterbi-path is a sequence of states Q maximizing the probability # P[Q|observations, model]. If we compute a Viterbi-path for a unfair casino # sequence of throws, the state sequence tells us when the casino was fair # (state 0) and when it was unfair (state 1). from UnfairCasino import test_seq v = m.viterbi(test_seq) print "fairness of test_seq: ", v # You can investigate how successful your model is at detecting cheating. Just # create artificial observations and see if you can detect them. my_seq = EmissionSequence(sigma, [1] * 20 + [6] * 10 + [1] * 40) print "artificial sequence : ", my_seq print "artificial observation : ", m.viterbi(my_seq) print ''' # # XXX: Some error on closing data, save to ignore as we are finished anyways # ''' # EOF