#ifndef __TOKENS_H__
#define __TOKENS_H__

/*
 * "Copyright (c) 2012 by Fritz Sieker."
 *
 * Permission to use, copy, modify, and distribute this software and its
 * documentation for any purpose, without fee, and without written
 * agreement is hereby granted, provided that the above copyright notice
 * and the following two paragraphs appear in all copies of this software,
 * that the files COPYING and NO_WARRANTY are included verbatim with
 * any distribution, and that the contents of the file README are included
 * verbatim as part of a file named README with any distribution.
 *
 * IN NO EVENT SHALL THE AUTHOR BE LIABLE TO ANY PARTY FOR DIRECT,
 * INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES ARISING OUT
 * OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN IF THE AUTHOR
 * HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 *
 * THE AUTHOR SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING, BUT NOT
 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
 * A PARTICULAR PURPOSE.  THE SOFTWARE PROVIDED HEREUNDER IS ON AN "AS IS"
 * BASIS, AND THE AUTHOR NO OBLIGATION TO PROVIDE MAINTENANCE, SUPPORT,
 * UPDATES, ENHANCEMENTS, OR MODIFICATIONS."
 */

/** @file tokens.h
 *  @brief interface to functions to tokenize a line of LC3 source code
 *  @details One of the first steps in converting a "high" level language to
 *  some other form is to tokenize the input stream. This process is also
 *  know as lexical analysis. It involves breaking the input into into a list
 *  of "words" that are significant in terms of the syntax of the language.
 *  <p>
 *  For a laguage such as C, this means identifying keywords, numbers, user
 *  defined names, and all the punctuation that is used (e.g. (){}+*-/,; ...).
 *  In languages like C, there can be multiple "statements" on a single line and
 *  "statements" can span multiple lines.
 *  <p>
 *  The LC3 assembly language is much simpler. Every statement is contained
 *  on a single line of the file. The only punctuation used is the comma used
 *  to separate multiple operands. The most complex statement is of the form:
 *  <pre><code>
 *    label opcode operand1, operand2, operand3
 *  </code></pre>
 *  This code is provided to reduce the work in completing your assembler
 *  project. For more details, see
 *  <a href="http://en.wikipedia.org/wiki/Lexical_analysis">this</a> description
 *  from Wikipedia. If you take a compiler class like cs453, you will learn
 *  a lot more about lexical analysis, and how to use tools that will
 *  generate the code for you from a language description in a text file.
 *  <p>
 *  @author Fritz Sieker
 */

/** Maximum length of source line */
#define MAX_LINE_LENGTH 8180

/** Max token in LC3 line, plus a few more to handle bad syntax */
#define MAX_TOKENS 10

/** Initialze the module */
void tokens_init (void);

/** Convert a single line of LC3 source code into a list of tokens and return
 *  the first one. Susequent tokens are retrieved using 
 *  <code>next_token()</code>. The function recoginizes the semi-colon as the
 *  LC3 end of line comment and discards all the comment. Tokens are separated
 *  by whitspace or commas. The commas are returned as part of the list.
 *  @param line - the source code line
 *  @return the first token of the line or NULL token. The value returned
 *  is a static variable whose contents are modified on each call. Therefore,
 *  the caller must copy values that need to be be preserved from call to call.
 *  For quoted strings used by the .STRINGZ directive, the returned token
 *  preserves the opening/closing quote marks, but converts all internal escape
 *  sequences into their actual character value. 
 */
char* tokenize_line (char* line);

/** Return the next token for the list generated by <code>tokenize_line()</code>
 *  @return the next token or NULL if there are no more tokens
 */
char *next_token (void);

/** Print the tokens of the line. This is for debugging purposes.
 */
void print_tokens (void);

/** Return the number of tokens in the current line */
int token_count (void);

/** Get a specified token from the line
 *  @param index - which token to return
 *  @return - the token at the index or NULL
 */
char* get_token (int index);

/** Terminate the module */
void tokens_term (void);

#endif