

> {-# LANGUAGE NoMonomorphismRestriction,GeneralizedNewtypeDeriving #-}







> import Control.Arrow

> import Control.Monad

> import Control.Monad.Instances

> import Control.Monad.State

> import Data.Either

> import Data.Function

> import Random

> import qualified Data.List as L

> import qualified Data.Map as M





Search Trees



> data Search c a = Leaf { lb::c, leaf::a}

> | Choice { lb::c, choices::[Search c a] } deriving Show





lb



> ex1 = Choice 0 [

> Choice (-log 0.1) [

> Leaf (-log 0.5) 'A',

> Leaf (-log 0.5) 'B'],

> Choice (-log 0.2) [

> Leaf (-log 0.6) 'C',

> Leaf (-log 0.4) 'D']]





Functor



> instance Functor (Search c) where

> fmap f (Leaf c a ) = Leaf c $ f a

> fmap f (Choice c as) = Choice c $ map (fmap f) as





>>=



> instance Num c => Monad (Search c) where

> return = Leaf 0

> a >>= f = join $ fmap f a where

> join (Leaf c a ) = Choice c [a]

> join (Choice c as) = Choice c $ map join as





MonadPlus

MonadPlus



> instance Num c => MonadPlus (Search c) where

> mzero = Choice 0 []

> a `mplus` b = Choice 0 [a,b]







> data Ord a => Tree a = Null | Fork a (Tree a) (Tree a) deriving Show



> isEmpty :: Ord a => Tree a -> Bool

> isEmpty Null = True

> isEmpty (Fork x a b) = False



> minElem :: Ord a => Tree a -> a

> minElem (Fork x a b) = x



> deleteMin :: Ord a => Tree a -> Tree a

> deleteMin (Fork x a b) = merge a b



> insert :: Ord a => a -> Tree a -> Tree a

> insert x a = merge (Fork x Null Null) a



> merge :: Ord a => Tree a -> Tree a -> Tree a

> merge a Null = a

> merge Null b = b

> merge a b

> | minElem a <= minElem b = connect a b

> | otherwise = connect b a



> connect (Fork x a b) c = Fork x b (merge a c)







> instance (Num c) => Eq (Search c a) where

> (==) = (==) `on` lb



> instance (Num c,Ord c) => Ord (Search c a) where

> compare = compare `on` lb





bumpUp



> bumpUp delta (Leaf c a) = Leaf (delta+c) a

> bumpUp delta (Choice c as) = Choice (delta+c) as







> runSearch :: (Num c,Ord c) => Tree (Search c a) -> [Either c a]

> runSearch Null = []

> runSearch queue = let

> m = minElem queue

> queue' = deleteMin queue

> in case m of

> Leaf c a -> Left c : Right a : runSearch queue'

> Choice c as -> Left c : (runSearch $ foldl (flip insert) queue' $ map (bumpUp c) as)







> integers m = Choice 1 [Leaf 0 m,integers (m+1)]



> test = do

> a <- integers 1

> b <- integers 1

> c <- integers 1

> guard $ a*a+b*b==c*c

> return (a,b,c)



> test1 = runSearch (insert test Null)





Left w



> reduce [] = []

> reduce (Left a : Left b : bs) = reduce (Left b : bs)







> reduce (Left a : bs) = Left (exp (-a)) : reduce bs

> reduce (Right a : bs) = Right a : reduce bs







> test2 = reduce test1





Grammar



> data Noun = Noun String deriving (Show,Eq,Ord)

> data Verb = Verb String deriving (Show,Eq,Ord)

> data Adj = Adj String deriving (Show,Eq,Ord)

> data Prep = Prep String deriving (Show,Eq,Ord)







> data NP = NP [Adj] Noun deriving (Show,Eq,Ord)

> data PP = PP Prep Noun deriving (Show,Eq,Ord)







> data Sentence = Sentence [NP] Verb [NP] [PP] deriving (Show,Eq,Ord)







> class UnParse a where

> unParse :: a -> String



> instance UnParse Noun where

> unParse (Noun a) = a



> instance UnParse Verb where

> unParse (Verb a) = a



> instance UnParse Adj where

> unParse (Adj a) = a



> instance UnParse Prep where

> unParse (Prep a) = a



> instance UnParse NP where

> unParse (NP a b) = concatMap unParse a ++ unParse b



> instance UnParse PP where

> unParse (PP a b) = unParse a ++ unParse b



> instance UnParse Sentence where

> unParse (Sentence a b c d) = concatMap unParse a ++ unParse b ++ concatMap unParse c ++ concatMap unParse d







> class Transducer t where

> char :: Char -> t Char

> choose :: [(Float,t a)] -> t a





Monad

Transducer



> string :: (Monad t, Transducer t) => [Char] -> t [Char]

> string "" = return ""

> string (c:cs) = do {char c; string cs; return (c:cs)}





ab

ba



> noun :: (Monad t, Transducer t) => t Noun

> noun = do

> a <- choose [(0.5,string "ab"),(0.5,string "ba")]

> return $ Noun a



> verb :: (Monad t, Transducer t) => t Verb

> verb = do

> a <- choose [(0.5,string "aa"),(0.5,string "b")]

> return $ Verb a



> adjective :: (Monad t, Transducer t) => t Adj

> adjective = do

> a <- choose [(0.5,string "ab"),(0.5,string "aa")]

> return $ Adj a



> parsePrep = do

> a <- choose [(0.5,string "a"),(0.5,string "b")]

> return $ Prep a







> many :: (Monad t, Transducer t) => Float -> t a -> t [a]

> many p t = choose [

> (p,return []),

> (1-p,do

> a <- t

> as <- many p t

> return $ a:as)]







> many1 p t = do

> a <- t

> as <- many p t

> return (a:as)







> parseNP = do

> a <- many 0.5 adjective

> b <- noun

> return $ NP a b



> parsePP = do

> a <- parsePrep

> b <- noun

> return $ PP a b



> sentence = do

> a <- many 0.5 parseNP

> b <- verb

> c <- many 0.5 parseNP

> d <- many 0.5 parsePP

> return $ Sentence a b c d







> newtype Generator a = Generator { unGen :: State StdGen a } deriving Monad

> newtype Parser a = Parser { runParse :: (String -> Search Float (a,String)) }







> instance Transducer Generator where

> char a = return a

> choose p = do

> r <- Generator (State random)

> case (L.find ((>=r) . fst) $ zip (scanl1 (+) (map fst p)) (map snd p)) of

> Just opt -> snd opt







> gen = mkStdGen 12343210

> generate n partOfSpeech = (unGen $ sequence (replicate n partOfSpeech)) `evalState` gen



> test3 = mapM_ print $ generate 10 sentence







> generateAndTest n partOfSpeech chars = do

> a <- generate n sentence

> guard $ unParse a == chars

> return a



> collectResults n partOfSpeech chars = M.fromListWith (+) $ map (flip (,) 1) $

> generateAndTest n partOfSpeech chars

> countResults n partOfSpeech chars = mapM_ print $ L.sortBy (flip compare `on` snd) $

> M.toList $ collectResults n partOfSpeech chars



> test4 = countResults 100000 (noun :: Parser Noun) "abab"







> instance Monad Parser where

> return a = Parser (\cs -> return (a,cs))

> p >>= f = Parser (\cs -> do

> (a,cs') <- runParse p cs

> runParse (f a) cs')



> instance MonadPlus Parser where

> mzero = Parser (\cs -> mzero)

> p `mplus` q = Parser (\cs -> runParse p cs `mplus` runParse q cs)



> instance Transducer Parser where

> char c = Parser $ char' where

> char' "" = mzero

> char' (a:as) = if a==c then return (a,as) else mzero

> choose p = foldl1 mplus $ map (\(p,x) -> prob p >> x) p where

> prob p = Parser (\cs -> Leaf (-log p) ((),cs))



> goParse (Parser f) x = runSearch $ insert (f x) Null



> end = Parser (\cs -> if cs=="" then return ((),"") else mzero)



> withEnd g = do

> a <- g

> end

> return a



> normalise results = let total = last (lefts results)

> in map (\x -> case x of

> Left a -> a / total

> Right b -> b

> ) results



> findParse chars = mapM_ print $ reduce $ runSearch $

> insert (runParse (withEnd sentence) chars) Null





Results



> main = do

> let string = "ababbbab"

> findParse string

> print "-------------------"

> countResults 1000000 (sentence :: Parser Sentence) string





Left p

string

I have three goals in this post:1. Refactoring the technique in my previous post so that building the search tree is entirely separate from searching the tree.2. Making it work with real-valued weights, not just integers3. Applying it to an ambiguous parsing problem, making use of a type class to define an abstract grammar.The idea is that I want to search a tree of possibilities where each edge of the tree is marked with a weight. The goal will be to search for leaves that minimise the sum of the weights of the edges down to the leaf.Here's an example tree:The minimum weight leaf is at C. If we're working with probabilities then we'll use minus the log of the probability of a branch as the weight. That way multiplication of probabilities becomes additions of weights, and the likeliest leaf has the minimum weight path.So here's the definition of a search tree. I've given both leaves and edges weights:(Compare with this .)is short for 'lower bound'. It provides a lower bound for the total weight of any option in this subtree (assuming non-negative weights). The tree in the diagram would look like:This tree is a container in a straightforward way and so we can make it an instance ofBut it's also a monad.maps all of the elements of a tree to trees in their own right, and then grafts those trees into the parent tree:It's easy to make trees into aby simply grafting trees into a new root.is meant to be a monoid, but this operation, as written below, isn't precisely associative. But it's 'morally' associative in that two terms that are meant to be equal describe equivalent search trees. So I'm not going to lose any sleep over it:For our searching we'll need a priority queue. I'll use a skew tree with code I lifted from somewhere I've forgotten:At each stage in the search we'll pick the unexplored branch with the lowest total weight so far. So when we compare trees we'll compare on their lower bounds. So we need an ordering on the trees as follows:The real cost of a choice isn't just the weight immediately visible in the tree but the cost of the journey you took to get there. We use thefunction to put that extra cost into the part of the tree we're currently looking at:The only tricky aspect to this code is that we need to be able to handle infinite trees. We can't have our code simply go off and return when it's found the next match because it might not be possible to do so in a finite time. Instead, the code needs to perform one operation at a time and report what it found at each point, even if that report is just stalling for time. We do this by returning a (possibly infinite) list containing elements that are either (1) the next item found or (2) a new update giving more information about the lower bound of the cost of any item that might be yet to come. This allows the caller to bail out of the search once the cost has passed a certain threshold.(Returning a useless looking constructor to stall for time is a common design pattern in Haskell. It's an example of how programs that work with codata need to keep being productive and you get something similar with the -|Skip|- in Stream Fusion . First time I write the code I failed to do this and kept wondering why my infinite searches would just hang, despite my great efforts to make it as lazy as possible.)A quick test of an infinite search: finding Pythagorean triples by brute force. We give each integer as cost one more than the previous one:I guess this is actually Dijkstra's algorithm, but on a tree rather than a general graph.If you run test1 you'll notice how the output is noisy because of all thoseterms. If you'e not worried about non-termination you could just throw out redundant output like so:Might as well convert weights to probabilities while we're at it:This version should be a lot less chatty:Now that searching works I can turn to an application - a more sophisticated example of what I briefly looked at previously ), parsing with ambiguous grammars. So let me first build types to represent parsed sentences in a toy grammar:The following two are noun phrase and prepositional phrase:And entire sentences:We want to be able to print parsed sentences so here's a quick 'unparse' type class to recover the underlying string:Now I'm going to approach the problem of parsing ambiguous sentences in two ways. One will be efficient, and one will be inefficient but represent the 'ground truth' against which we'll compare. (This reflects standard practice in graphics publications where authors compare their fancy new algorithm with an ultra-slow but reliable Monte Carlo ray-tracer.)I'm going to assume that sentences in my language are described by a "context free" probability distribution so that a noun phrase, say, has a fixed probability of being made up of each possible combination of constituents regardless of the context in which it appears.I need an English word for something that takes a grammar and does something with it but I'm at a loss to think of an example. I'll use 'transducer', even though I don't think that's right.So a transducer is built from either terminal nodes of one character, or it's one of a choice of transducers, each with a given probability:And here's our toy grammar. It's nothing like an actual natural language because real grammars take a long time to get right. Note I'm just giving the first couple of type signatures to show that the grammar uses only theandinterfaces:So, for example, a noun has a 50% chance of being the stringand a 50% chance of being the stringSome of our "parts of speech" allow sequences of terms. We need some kind of probabilistic model of how many such terms we can expect. I'm going to assume the probability falls off exponentially with the number of items:I also have a convenience function for sequences of length at least 1:And now the rest of the grammar:We're going to use this grammar with two instances of type Transducer. The first will use the rules of the grammar as production rules to generate random sentences. The second will parse strings using the grammar. So we get two uses from one 'transducer'. This is pretty powerful: we have described the grammar in an abstract way that doesn't asuume any particular use for it.Let's implement the generation first:We can test it by generating a bunch of random sentences:We can now use generate-and-test to estimate what proportion of randomly generated sentences match a given sentence:On the other hand we can build a parser, based on Hutton's , just like in my previous post except using this new tree search monad:And now we can try running both methods on the same string:You should see the parsings from countResults in roughly the same proportion as the relative probabilities given by findParse. Remember that the relative probability of a given parsing is the lastterm before that parsing. Try playing with, the number of Monte Carlo runs and the seed. Remember that there is going to be some variation in the randomised algorithm, especially with hard to parse strings, but raising the number of runs will eventually give reasonable numbers. Of course ultimately we don't care about the Monte Carlo method so it's allowed to be slow.Anyway, none of this is a new algorithm. You can find similar things in papers such as Probabilistic tree transducers and A Generalization of Dijkstra's Algorithm . But what is cool is how easily Haskell allows us to decouple the tree building part from the searching part. (And of course the tree is never fully built, it's built and destroyed lazily as needed.) All of the published algorithms have the parsing and searching hopelessly interleaved so it's hard to see what exactly is going on. Here the search algorithm doesn't need to know anything about grammars, or even that it is searching for parsings. Semiring Parsing is also easy to implement this way.BTW If you think my "ab" language is a bit to contrived, check out the last picture here for an example of some natural language that is in a similar spirit :-)