#ifndef __ASSEMBLER_H__ #define __ASSEMBLER_H__ /* * "Copyright (c) 2014 by Fritz Sieker." * * Permission to use, copy, modify, and distribute this software and its * documentation for any purpose, without fee, and without written * agreement is hereby granted, provided that the above copyright notice * and the following two paragraphs appear in all copies of this software. * * IN NO EVENT SHALL THE AUTHOR BE LIABLE TO ANY PARTY FOR DIRECT, * INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES ARISING OUT * OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN IF THE AUTHOR * HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * * THE AUTHOR SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING, BUT NOT * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR * A PARTICULAR PURPOSE. THE SOFTWARE PROVIDED HEREUNDER IS ON AN "AS IS" * BASIS, AND THE AUTHOR NO OBLIGATION TO PROVIDE MAINTENANCE, SUPPORT, * UPDATES, ENHANCEMENTS, OR MODIFICATIONS." */ /** @file assembler.h * @brief Defines the interface to assembler.c functions (do not modify) * @details This file defines the interface to a C file assembler.c that * you will complete. This is the main portion of an assembler for LC3. *

* This is a substantial assignment. One way to attack larger projects is to * break the problem into pieces and code those pieces individually. You * have already done this for the files field.c, * util.c and * symbol.c. Similarly, instead of writing large functions, * decompose them into a series of calls to shorter functions which do * required subtasks. The decomposition may be continued until each low level * function is easy to write correctly. If you find any one function getting * "too" large, think about what the code does and decompose it into several * smaller functions. In fact, if you can give the task a good symbolic name, * you can postpone writing it until later. This is because the name * implies "what" the function does. "How" it is done, will be determined when * that function is coded. It is perfectly fine to have methods that are only * called once from the code. The idea is that each of these functions * peform a straight forward subtask. And it is perfectly fine to have * functions that are only a few lines of code. *

* Most of the methods documented here are suggestions for abstractions that * perform, small, useful tasks. They are for your convenience. By creating * more abstractions, you will find the code easier to write and debug. * Only the function with the prefix asm_ are called from the * main program. * These helper methods should be static, but are declared here * for documentation purposes. *

* @author Fritz Sieker */ #include "lc3.h" #include "symbol.h" #ifndef LC3AS_VAR #define LC3AS_VAR extern #endif /** Error messages passed to function asm_error() */ #define ERR_OPEN_READ "could not open '%s' for reading." #define ERR_OPEN_WRITE "could not open '%s' for writing." #define ERR_LINE_TOO_LONG "source line too long (max is %d)" #define ERR_NO_ORIG "no .ORIG directive found" #define ERR_NO_END "no .END directive found" #define ERR_ORIG_NOT_1ST "instruction(s) appear before .ORIG" #define ERR_END_NOT_LAST "instruction(s) appear after .END" #define ERR_MISSING_ORIG ".ORIG not found" #define ERR_MISSING_END ".END not found" #define ERR_EXPECTED_COMMA "expected comma, got '%s'" #define ERR_EXPECTED_REG "expected register (R0-R7), got '%s'" #define ERR_EXPECT_REG_IMM "expected register or immediate, got '%s'" #define ERR_BAD_LABEL "label '%s' contains illegal characters" #define ERR_MISSING_OP "expected LC3 op, got '%s'" #define ERR_MISSING_OPERAND "too few operand(s)" #define ERR_EXTRA_OPERAND "extra operand(s) '%s'" #define ERR_DUPLICATE_LABEL "label '%s' previosly defined" #define ERR_MISSING_LABEL "label '%s' never defined" #define ERR_BAD_PCOFFSET "PCoffset to '%s' out of range" #define ERR_BAD_IMM "immediate '%s' (bad format)" #define ERR_IMM_TOO_BIG "immediate '%s' out of range" #define ERR_EXPECTED_STR "expected quoted string, got '%s'" #define ERR_BAD_STR "unterminated string '%s'" /** A global variable defining the line in the source file */ LC3AS_VAR int srcLineNum; /** A global variable defining the LC3 address of the current instruction */ LC3AS_VAR int currAddr; /** A global variable defining the number of errors found */ LC3AS_VAR int numErrors; /** Typedef of structure type */ typedef struct line_info line_info_t; /** Structure containing all the values that might be found in a source line. * The contents of this structure are set during pass_one() and * used to build machineCode during pass_two */ struct line_info { line_info_t* next; /**< Allow for linked list */ int lineNum; /**< Line number in source code */ int address; /**< LC3 address of instruction */ int machineCode; /**< The 16 bit LC3 instruction */ opcode_t opcode; /**< opcode of instruction */ int form; /**< which form of instruction (ADD/ADDI) */ int reg1; /**< DR or SR, if present */ int reg2; /**< SR1 or BaseR, if present */ int reg3; /**< SR2, if present */ int immediate; /**< Immediate value if present */ char* reference; /**< Label referenced by instruction, if any */ }; /** A function to print error messages. This function takes a minimum of one * parameter. It is exaclty like the printf() function. The first * parameter is a formatting string. The remaining parameters (if any) are * the actual values to be printed. It must be used for reporting all errors. * Do not modify. * @param msg - the formating string */ void asm_error (char* msg, ...); /** Do whatever initialization is necessary for the assembler */ void asm_init (void); /** A function to initialize all the fields of the structure to default values * The lineNum and address fields are initialized on return. All other fields * are set to "default" values. * Do not modify. * @param info - pointer to information about a source line */ void asm_init_line_info (line_info_t* info); /** This function performs the processing required in the first pass. At a * minimum, it must check the syntax of each instruction and create the symbol * table. For this assignment, it will also create a data structure for use * by the second pass. The flow of this function is: *

    *
  1. open the source file and report an error (think about your convenience * functions)
  2. *
  3. read the lines one at a time using fgets(). The function * fgets() takes advantage of C's ability to return multiple * values in a single call by using pointers. The values it reads are * stored in the character buffer passed as the first argument. * Success/failure is indicated by the actual return value.
  4. *
  5. convert the line to a list of tokens
  6. *
  7. if there is an opcode on the line, then
  8. *
      *
    1. allocate a new line_info_t store it in the * global variable currInfo and initialize it
    2. *
    3. convert the tokens to values and set the appropriate fields * of currInfo
    4. *
    5. add it to the linked list defined by infoHead and * infoTail
    6. *
    7. update the current address
    8. *
    *
  9. If there were no errors, write the symbol table file using * lc3_write_sym_tab().
  10. * *
* @param asm_file_name - name of the file to assemble * @param sym_file_name - name of the symbol table file */ void asm_pass_one (char* asm_file_name, char* sym_file_name); /** This function generates the object file. It is only called if no errors were * found during asm_pass_one(). The basic structure of this code * is to loop over the data structure created in asm_pass_one(), * generate object code (16 bit LC3 instructions) and write it to the object * file. * @param obj_file_name - name of the object file for this source code */ void asm_pass_two(char* obj_file_name); /** A function to print the infomation extracted from a source line. This is * used for debugging. * Do not modify. * @param info - pointer to informaton about a source line */ void asm_print_line_info (line_info_t* info); /** Cleanup everything used by the assembler */ void asm_term (void); /** A function to check if the token is a label. In LC3 assembly language, * the optional label may preceed the opcode. Therefore, if the token is * not a valid opcode, assume it is label. If it is a valid label, * add it to the symbol table. Report any errors found using * asm_error(). * @param token - the token to consider * @return If the token is a label, then return the next token. * Otherwise, return the parameter. */ char* check_for_label (char* token); /** A function to check the syntax of a source line. At the conclusion of this * function, the appropriate fields of the global currInfo are * initialized. For example, if the current instruction is * NOT R4,R3 then at the conclusion of the function, * the fields opcode, reg1, reg2 would contain the values * OP_NOT, 4, 3 respectively. The basic flow of the function is: *
    *
  1. determine if the first token is a label
  2. *
  3. convert the first/next token to an opcode and store it into your * data structure. It is legal for an * LC3 statemet to have only a label. This will be the case if the * value returned by check_for_label() is * NULL.
  4. *
  5. determine the list of operands corresponding to this * opcode. How can you get this from the lc3 module?
  6. *
  7. if this opcode has operands associated with it then * call scan_operands()
  8. *
  9. make sure there are no extra operands
  10. *
* @param token - the first token on the line. This could be a label or an * operator (e.g. ADD or .FILL). */ void check_line_syntax (char *token); /** A second pass function to take one field from the currInfo * structure and place it in the machineCode field. For example, if * the operand is FMT_R1, then the field reg1 is put * in bits 11 .. 9. The flow of this function mirrors the code of pass one * function. * get_operand(). * @param operand - the type of operand */ void encode_operand (operand_t operand); /** This second pass function is used to convert the reference into a PC offset. * There are several errors that may occur. The reference may not occur in * the symbol table, or the offset may be out of range. If successful, this * function puts the PC offset in the machineCode field of * currInfo. * @param width - the number of bits that hold the PC offset */ void encode_PC_offset_or_error (int width); /** A convenience function to make sure the next token is a comma and report * an error if it is not. */ void get_comma_or_error (void); /** A convenience function to convert an token to an immediate value. Used for * the imm5/offset6/trapvect8/.ORIG values. The value is obtained by calling a * function provided for you in the lc3 module. If * the value is not in the correct format, or out of range, report an error * using asm_error(). If it is good, store it in the * immediate field of currInfo * @param token - the string to be converted to an immediate * @param width - how many bits are used to store the value * @param isSigned - specifies if number is signed or unsigned */ void get_immediate_or_error (char* token, int width, int isSigned); /** A convenience function to get the label reference used in the * BR/LD/LDI/LEA/ST/STI/JSR instructions. * The code should make sure it is a * valid label. If it is valid, store it in the reference field of * currInfo. If is not a valid label, report an error. You should * understand why this routine may not be able to directly calculate the * PCoffset. * @param token - the reference to check */ void get_PC_offset_or_error (char* token); /** A convenience function to convert the token to a value and store it in * the currInfo data structure. For example, if the operand is * FMT_R1 and the token is R4, then the field * reg1 is set to 4. * @param operand - the type of operand that is expected * @param token - the token to be converted */ void get_operand (operand_t operand, char* token); /** A function to convert a string to a register number and report an error * if the string does not represent an register. Use the function * asm_error() to report errors. * @param token - the string to convert to a register * @return the register number, or -1 on an error */ int get_reg_or_error (char* token); /** Open file for reading and report an error on failure. Use the C function * fopen() and report errors using asm_error(). * @param file_name - name of file to open * @return the file or NULL on error */ FILE *open_read_or_error (char* file_name); /** Open file for writing and report an error on failure. Use the C function * fopen() and report errors using asm_error(). * @param file_name - name of file to open * @return the file or NULL on error */ FILE *open_write_or_error (char* file_name); /** A convenience function to scan all the operands of an LC3 instruction. * The basic flow of this function is: *
    *
  1. initialize a count of the number of operands expected
  2. *
  3. initialize a count of the number of errors found before this * function was called.
  4. *
  5. loop over the all the possible operand types. if the operand type is * used by this LC3 instuction, do the following * *
  6. *
* @param operands - a "list" of the operands for the current LC3 instruction * encoded as individual bits in an integer. */ void scan_operands (operands_t operands); /** This function is responsible for determing how much space and LC3 * instruction or pseudo-op will take and updating the global variable * currAddr defined in the file assembler.h. * Most LC3 instructions require a single word. However, there are several * exceptions: * */ void update_address (void); #endif