#ifndef __SYMBOL_H__ #define __SYMBOL_H__ /* * "Copyright (c) 2014 by Fritz Sieker." * * Permission to use, copy, modify, and distribute this software and its * documentation for any purpose, without fee, and without written * agreement is hereby granted, provided that the above copyright notice * and the following two paragraphs appear in all copies of this software. * * IN NO EVENT SHALL THE AUTHOR BE LIABLE TO ANY PARTY FOR DIRECT, * INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES ARISING OUT * OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN IF THE AUTHOR * HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * * THE AUTHOR SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING, BUT NOT * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR * A PARTICULAR PURPOSE. THE SOFTWARE PROVIDED HEREUNDER IS ON AN "AS IS" * BASIS, AND THE AUTHOR NO OBLIGATION TO PROVIDE MAINTENANCE, SUPPORT, * UPDATES, ENHANCEMENTS, OR MODIFICATIONS." */ /** @file symbol.h * @brief Defines the interface to symbol.c functions (do not modify) * @details This file defines the interface to a C file symbol.c that * you will complete. *
* In this implementation, you will learn about dynamic memory management using
* malloc/calloc/free. You will also learn about function pointers (callback
* functions).
*/
/** This defines an opaque type. The actual contents of the structure are hidden
* in the implementation and only a pointer to this structure is used
* externally to this file. A pointer to an opaque structure is sometimes
* referred to as a handle.
*/
typedef struct sym_table sym_table_t;
/** The symbol_find methods return a pointer to this data structure. It
represents a (name, address) pair in the symbol table.
*/
typedef struct symbol {
char* name; /**< the name of the symbol */
int addr; /**< symbol's address in the LC3 memory */
} symbol_t;
/** Defines the signature of a callback function (also known as a function
* pointer). This is how languages such as Java and C++ do dynamic
* binding (i.e. figure out which function to call). Recall that in Java, the
* code obj.equals(object)
will call one of possibly many
* different methods depending on the actual type of obj
. This
* is because the method .equals() may be overridden.
*
* In the LC3, dynamic binding is based on the JSRR opcode. With this * opcode, the address of the routine to call is stored in a register and can * be changed at runtime. Compare this to a JSR nameOfRoutine opcode * which specifies what routine to call from the label that follows it. Thus, * the address is fixed at assembly time. *
* This is used in the symbol_iterate
function.
* @param sym - pointer to a symbol
* @param data - a pointer to some data that the function might need.
*/
typedef void (*iterate_fnc_t)(symbol_t* sym, void* data);
/** Create a new symbol table and return a pointer to it. This function is a
* constructor for a symbol table and is automatically called when the program
* starts. In this function, you must dynamically (i.e., using
* malloc()/calloc()
) allocate space for the following:
*
*
sym_table_t
structure: it is important to note
* that this structure does not contain the actual data of the symbol
* table. It's a bookkeeping structure that contains the capacity of the
* hash table, a pointer to the actual hash table, and a pointer to the
* address table (refer to our drawing of the data structures). Hence,
* don't try to allocate space for more than one sym_table_t
* structure.node_t*
. The
* capacity of the hash table is given by the table_size
* argument which comes from the command-line argument that you supply to
* ./testSymbol
. You can think of each element of this array
* as being a pointer to the head of a linked list. Hence, you must ensure
* that all the pointers in the hash table are initialized to 0 (or NULL)
* since the table is initially empty. The hash_table
member
* of the sym_table_t
structure allocated previously must be
* set to point to the start of the hash table.char*
. The
* number of elements is fixed. It corresponds to the number of possible
* addresses in the LC-3. Since an LC-3 address can only be 16 bits, the
* number of possible addresses is 216. For your convenience, we
* have defined a constant in symbol.c
with this value
* (LC3_MEMORY_SIZE
). Each element in this array is a pointer
* to the start of a string. Hence, you must ensure that all the pointers
* in the table are initialized to 0 (or NULL) since the table is initially
* empty. The addr_table
member of the
* sym_table_t
structure allocated previously must be set to
* point to the start of the address table.table_size
in the size
member
* of the sym_table_t
structure allocated previously.
*
* @param table_size - The size of the hash table.
* @return A pointer to the sym_table_t
structure you
* allocated. This pointer will be passed to you in the other functions so that
* you can access the hash table and the address table.
*/
sym_table_t* symbol_init (int table_size);
/** Add a symbol to the symbol table. This function assumes that the name you
* are trying to add to the symbol table is not already associated with an
* existing symbol (you do not have to check for name duplicates in this
* function). You must initialize the fields of the structure appropriately and
* store it at the correct index in the hash table. The entry at a given index
* of the hash table is a pointer to the head of a linked list of nodes. The
* symbol just created must
* become the new head of the list. If you do not follow this
* requirement, you may not get full credit. You also need to make the
* corresponding entry in the address table point to the name of the symbol
* that was just added (each index in the addr_table
array
* represents an address). If an entry in the addr_table
array is
* already pointing to a name (this happens if two names are associated with
* the same address), then you must not modify the existing entry
* addr_table
array (but you should still add the symbol to the
* hash table).
*
* The general algorithm is as follows:
*
* symbol_hash()
function (defined in
* symbol.c
).index = hash-value % capacity-of-hash-table
.node_t
) and initialize its members
* appropriately. All members of the node should be initialized. The
* next
member is used to link to the next node in the linked
* list (this should be NULL for the last node in the list). The
* hash
member should contain the hash (not the index)
* calculated previously. This member will be useful in the
* symbol_search()
function. The symbol
member
* is a structure that contains the (name, addr) pair. The name must be
* stored as-is, i.e., don't change uppercase or lowercase letters. See
* the important note below about the name
parameter.addu
command, but
* you will not be able to see if you inserted a node until you implement the
* symbol_find_by_addr()
and symbol_iterate()
* functions.
*
* A hash table is designed to spread out the entries, often sacrificing space
* for speed. To thoroughly test your function, you will need to force multiple
* names to occupy the same index in the hash table (this will be tested in
* part A, it is not the same as duplicate symbol detection). Think about the
* pigeon hole principle from CS161. Your code will be tested with a variety of
* different values.
*
* Important:
*
* const char
* *name
. This is the name argument that the user enters for the
* addu
command. More specifically, it is a pointer to that
* string in memory. You cannot assume that the string is going to remain
* unchanged after your function returns. The caller may choose to do
* anything with the memory where this pointer is pointing to. Hence, you
* must create a copy of the name argument. That way, you can guarantee
* that the copy will remain unchanged for the duration of the program. To
* do this, you can use the strdup()
function. This function
* will dynamically allocate space for a string copy and will copy the
* passed string. Because this is a dynamic allocation, you will later need
* to free it (in part B). If you don't create a copy of the string,
* you may see repeated symbols in the program output. Type man
* strdup
in the terminal to see the documentation for this
* function.symbol_iterate()
function generates
* when using the list
command may be different from what the
* autograder expects and you may not get full credit.addr_table
. Use the label
command to test
* this function.
*
* @param symTab - Pointer to a sym_table_t structure so that you can access
* the hash table and the address table.
* @param addr - An address.
* @return The label at that address or NULL if no symbol is
* associated with the address.
*/
char* symbol_find_by_addr (sym_table_t* symTab, int addr);
/** Visit all the symbols in the hash table and call a function for each one.
* The interesting thing is that the function to be called for each node is
* called through a function pointer given to you as the
* fnc
parameter. Calling a function through a function pointer is
* straightforward. The line of code you will use looks like this (replace the
* part in blue appropriately):
*
* (*fnc)(memory address
* of symbol, data);
*
* Why is this useful? Because whoever calls your symbol_iterate()
* function can specify what to do for each node by defining a separate
* function and passing you a pointer to it. It makes your program more
* flexible. In this assignment, we use symbol_iterate()
for two
* commands: list
and count
. The list
* command passes a printing function to your function so that each node is
* shown on the screen (but you don't need to know that, you just need to call
* the function through the function pointer). The count
command
* passes a counting function to your function (but again, you don't really
* need to know that). The data
argument is passed to you as an
* argument in the symbol_iterate()
function. You don't need to do
* anything with this parameter. Just pass it as-is to the function that you're
* calling using the function pointer. Function pointers are used in
* event-driven programming where callback functions are needed to process
* events. This is a popular paradigm in computer networking.
*
* It is very important
* that you visit the nodes in a specific order. You must have a loop that
* visits each entry in the hash table starting at index 0. Every time it finds
* a non-NULL pointer, it must traverse the corresponding linked list. The
* following figure shows a hash table of size 6. The order of traversal is
* shown with numbers inside each node:
*
*
*
* @param symTab - Pointer to a sym_table_t structure so that you can access
* the hash table and the address table.
* @param fnc - Pointer to the function to be called on every symbol.
* @param data - Any additional information to be passed on to
* fnc
directly. The called function will cast this to whatever
* type it expects.
*/
void symbol_iterate (sym_table_t* symTab, iterate_fnc_t fnc, void* data);
/** This function is a useful support function for the
* symbol_add()
and symbol_find_by_name()
functions.
* It searches for a node in the hash table whose symbol's name matches the
* name passed to the function. The search must be performed in a case
* insensitive manner.
*
* The general algorithm to search for a symbol is:
*
* symbol_hash()
function (defined in
* symbol.c
).index = hash-value % capacity-of-hash-table
.symbol_hash()
function is case insensitive,
* meaning, for example, that the hash values of ab and AB are the same.
* However, you must still compare strings in a case-insensitive manner
* (hint: do a Google search to look into the C strcasecmp()
* function).NULL
* otherwiseptrToHash
and ptrToIndex
pointers (even if the
* node was not found). You can test your implementation by using the
* search
command.
*
* @param symTab - Pointer to a sym_table_t structure so that you can access
* the hash table and the address table.
* @param name - The name of the symbol.
* @param ptrToHash - This function must obtain the hash value of the
* name
parameter and return it through this pointer (even if the
* node was not found).
* @param ptrToIndex - This function must obtain the index in which the symbol
* being searched for is expected to be in the hash table and must return it
* through this pointer (even if the node was not found).
* @return A pointer to the node that contains the symbol being searched for
* or NULL if the symbol was not found.
*/
struct node* symbol_search (sym_table_t* symTab, const char* name, int* ptrToHash, int* ptrToIndex);
/** Add a symbol to the symbol table if there is no other symbol with the same
* name already in the symbol table. Otherwise, it returns an error code. You
* should use the symbol_search()
function to check if there is a
* name duplicate (because this check must be case insensitive). If the name is
* not a duplicate, you should use the symbol_add_unique()
* function to add the symbol to the table (because the insertion must be done
* in the hash table as well as the address table in the same manner as in the
* symbol_add_unique()
function). You may test your implementation
* using the add
command.
*
* @param symTab - Pointer to a sym_table_t structure so that you can access
* the hash table and the address table.
* @param name - The name of the symbol.
* @param addr - The address of the symbol.
* @return 1 if the symbol is not a name duplicate and was added, 0 if the
* symbol is a name duplicate.
*/
int symbol_add (sym_table_t* symTab, const char* name, int addr);
/** Find a symbol by its name. The search must be case insensitive. You should
* use the symbol_search()
function to do the heavy work.
*
* @param symTab - Pointer to a sym_table_t structure so that you can access
* the hash table and the address table.
* @param name - The name of the symbol.
* @return A pointer to the symbol or NULL if the symbol was not found.
*/
symbol_t* symbol_find_by_name (sym_table_t* symTab, const char* name);
/** Remove all the symbols from the symbol table. This involves:
*
* reset
command. The goal is for the
* symbol table to be reset to the initial state so that new symbols can be
* added immediately after resetting. You should also use Valgrind to make sure
* that you don't have memory errors (like invalid reads/writes or
* uninitialized values). At this point, it may be too early to check for
* memory leaks because by the time this function returns, there will still be
* some blocks in the heap. You can wait until you implement
* symbol_term()
to check for memory leaks. Refer to the main
* instructions for details on how to run Valgrind.
*
* @param symTab - Pointer to a sym_table_t structure so that you can access
* the hash table and the address table.
*/
void symbol_reset (sym_table_t* symTab);
/** Remove all symbols from the symbol table and free all dynamically allocated
* memory. This function is a destructor for a symbol table. Consider using the
* symbol_reset()
function and then free the remaining memory.
* There must not be any memory leaks. Test it with Valgrind. When you run it
* with the \-\-leak\-check=yes
option, you should see the
* following two lines:
*
* All heap blocks were freed -- no leaks are possible
ERROR SUMMARY: 0 errors from 0 contexts (suppressed: 0 from 0)
*
* Refer to the main instructions for details on how to run Valgrind.
*
* This function is called automatically when the test driver terminates by
* typing the quit
command.After executing this function, the
* opaque pointer to the symbol table is no longer valid.
*
* @param symTab - Pointer to a sym_table_t structure so that you can access
* the hash table and the address table.
*/
void symbol_term(sym_table_t* symTab);
#endif /* __SYMBOL_H__ */