-- LexerTutorial.hs -- -- An example of how to tokenize an input stream. -- Tokenizes stdin and prints the list of tokens found to stdout. -- Handles a subset of the tokens needed to write a lexer for MeggyJava. -- -- compilation: -- ghc --make -O2 LexerTutorial.hs -o lexer -- -- command-line usage: -- ./lexer < infile -- ./lexer < infile > outfile -- -- interactive usage in ghc REPL: -- ghci -- Prelude> :l Main1.hs -- *Main> lexer "hello there ; . , * {} (bye)" -- module Main where -- needed for isChar, isAlpha, isSpace, isDigit import Data.Char main = do file_as_str <- getContents let output = lexer file_as_str print output -- Token datatype. -- Much like an enumerated type except the TokenInt and TokenId -- constructors also take a value. Deriving Show so they can -- be printed. data Token = TokenImportkw | TokenPublickw | TokenInt Int | TokenId String | TokenLBrace | TokenRBrace | TokenLParen | TokenRParen | TokenSemi | TokenPeriod | UnknownChar Char deriving Show ----------------------------------------------------------------- -- Function that performs lexical analysis. -- The following line declares the type of the function. -- The input parameter is a String type and the function returns -- a list of Tokens. lexer :: String -> [Token] -- Below has examples of pattern matching. The first pattern "[]" -- is an empty list. The later patterns match some character at the -- beginning of a list. The ":" operator adds an element to the beginning -- of the list when on the right-hand side "TokenLBrace : lexer cs" -- and pattern matches the first item in a list when on the left hand sid -- "('{':cs)". -- See http://www.haskell.org/haskellwiki/How_to_work_on_lists. lexer [] = [] lexer ('{':cs) = TokenLBrace : lexer cs lexer ('}':cs) = TokenRBrace : lexer cs lexer ('(':cs) = TokenLParen : lexer cs lexer (')':cs) = TokenRParen : lexer cs lexer (';':cs) = TokenSemi : lexer cs lexer ('.':cs) = TokenPeriod : lexer cs -- The below uses pattern matching to bind the variables c and cs, -- then it uses guards to determine what recursion to perform. -- See http://www.haskell.org/haskellwiki/Case#Guards -- and http://stackoverflow.com/questions/4156727/what-is-the-difference-between-pattern-matching-and-guards. -- Note that the guards are executed in order with only the first -- one returning true being executed. lexer (c:cs) | isSpace c = lexer cs | isAlpha c = lexId (c:cs) | isDigit c = lexNum (c:cs) | True = UnknownChar c : lexer cs -- Function of processing integers. -- Uses a where clause. lexNum cs = TokenInt (read num) : lexer rest where (num,rest) = span isDigit cs -- Function for processing identifiers. -- The span function is built in to the Prelude. Look it up. -- This function implementation shows an example of the case expression, -- which also uses pattern matching. lexId cs | s1=="import" = TokenImportkw : lexer s2 | True = TokenId s1 : lexer s2 where (s1,s2) = (span isAlpha cs) -- case span isAlpha cs of -- ("import",rest) -> TokenImportkw : lexer rest -- (id,rest) -> TokenId id : lexer rest