#ifndef __TOKENS_H__ #define __TOKENS_H__ /* * "Copyright (c) 2012 by Fritz Sieker." * * Permission to use, copy, modify, and distribute this software and its * documentation for any purpose, without fee, and without written * agreement is hereby granted, provided that the above copyright notice * and the following two paragraphs appear in all copies of this software, * that the files COPYING and NO_WARRANTY are included verbatim with * any distribution, and that the contents of the file README are included * verbatim as part of a file named README with any distribution. * * IN NO EVENT SHALL THE AUTHOR BE LIABLE TO ANY PARTY FOR DIRECT, * INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES ARISING OUT * OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN IF THE AUTHOR * HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * * THE AUTHOR SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING, BUT NOT * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR * A PARTICULAR PURPOSE. THE SOFTWARE PROVIDED HEREUNDER IS ON AN "AS IS" * BASIS, AND THE AUTHOR NO OBLIGATION TO PROVIDE MAINTENANCE, SUPPORT, * UPDATES, ENHANCEMENTS, OR MODIFICATIONS." */ /** @file tokens.h * @brief interface to functions to tokenize a line of LC3 source code * @details One of the first steps in converting a "high" level language to * some other form is to tokenize the input stream. This process is also * know as lexical analysis. It involves breaking the input into into a list * of "words" that are significant in terms of the syntax of the language. *

* For a laguage such as C, this means identifying keywords, numbers, user * defined names, and all the punctuation that is used (e.g. (){}+*-/,; ...). * In languages like C, there can be multiple "statements" on a single line and * "statements" can span multiple lines. *

* The LC3 assembly language is much simpler. Every statement is contained * on a single line of the file. The only punctuation used is the comma used * to separate multiple operands. The most complex statement is of the form: *


 *    label opcode operand1, operand2, operand3
 *  
* This code is provided to reduce the work in completing your assembler * project. For more details, see * this description * from Wikipedia. If you take a compiler class like cs453, you will learn * a lot more about lexical analysis, and how to use tools that will * generate the code for you from a language description in a text file. *

* @author Fritz Sieker */ /** Maximum length of source line */ #define MAX_LINE_LENGTH 8180 /** Max token in LC3 line, plus a few more to handle bad syntax */ #define MAX_TOKENS 10 /** Initialze the module */ void tokens_init (void); /** Convert a single line of LC3 source code into a list of tokens and return * the first one. Susequent tokens are retrieved using * next_token(). The function recoginizes the semi-colon as the * LC3 end of line comment and discards all the comment. Tokens are separated * by whitspace or commas. The commas are returned as part of the list. * @param line - the source code line * @return the first token of the line or NULL token. The value returned * is a static variable whose contents are modified on each call. Therefore, * the caller must copy values that need to be be preserved from call to call. * For quoted strings used by the .STRINGZ directive, the returned token * preserves the opening/closing quote marks, but converts all internal escape * sequences into their actual character value. */ char* tokenize_line (char* line); /** Return the next token for the list generated by tokenize_line() * @return the next token or NULL if there are no more tokens */ char *next_token (void); /** Print the tokens of the line. This is for debugging purposes. */ void print_tokens (void); /** Return the number of tokens in the current line */ int token_count (void); /** Get a specified token from the line * @param index - which token to return * @return - the token at the index or NULL */ char* get_token (int index); /** Terminate the module */ void tokens_term (void); #endif