<html xmlns:v="urn:schemas-microsoft-com:vml"
xmlns:o="urn:schemas-microsoft-com:office:office"
xmlns:w="urn:schemas-microsoft-com:office:word"
xmlns:m="http://schemas.microsoft.com/office/2004/12/omml"
xmlns:mv="http://macVmlSchemaUri" xmlns="http://www.w3.org/TR/REC-html40">

<head>
<meta name=Title
content=" Assignment 4: CS 200 Data Structures and Algorithms (Fall 2014) ">
<meta name=Keywords content="">
<meta http-equiv=Content-Type content="text/html; charset=unicode">
<meta name=ProgId content=Word.Document>
<meta name=Generator content="Microsoft Word 14">
<meta name=Originator content="Microsoft Word 14">
<link rel=File-List href="PA4F14_files/filelist.xml">
<link rel=Edit-Time-Data href="PA4F14_files/editdata.mso">
<!--[if !mso]>
<style>
v\:* {behavior:url(#default#VML);}
o\:* {behavior:url(#default#VML);}
w\:* {behavior:url(#default#VML);}
.shape {behavior:url(#default#VML);}
</style>
<![endif]-->
<title> Assignment 4: CS 200 Data Structures and Algorithms (Fall 2014) </title>
<!--[if gte mso 9]><xml>
 <o:DocumentProperties>
  <o:Author>Adele Howe</o:Author>
  <o:LastAuthor>Adele Howe</o:LastAuthor>
  <o:Revision>12</o:Revision>
  <o:TotalTime>118</o:TotalTime>
  <o:LastPrinted>2014-10-29T21:00:00Z</o:LastPrinted>
  <o:Created>2010-10-29T03:51:00Z</o:Created>
  <o:LastSaved>2014-10-29T21:40:00Z</o:LastSaved>
  <o:Pages>2</o:Pages>
  <o:Words>1417</o:Words>
  <o:Characters>8082</o:Characters>
  <o:Company>Colorado State Univ</o:Company>
  <o:Lines>67</o:Lines>
  <o:Paragraphs>18</o:Paragraphs>
  <o:CharactersWithSpaces>9481</o:CharactersWithSpaces>
  <o:Version>14.0</o:Version>
 </o:DocumentProperties>
 <o:OfficeDocumentSettings>
  <o:AllowPNG/>
 </o:OfficeDocumentSettings>
</xml><![endif]-->
<link rel=themeData href="PA4F14_files/themedata.xml">
<!--[if gte mso 9]><xml>
 <w:WordDocument>
  <w:Zoom>150</w:Zoom>
  <w:SpellingState>Clean</w:SpellingState>
  <w:TrackMoves>false</w:TrackMoves>
  <w:TrackFormatting/>
  <w:ValidateAgainstSchemas/>
  <w:SaveIfXMLInvalid>false</w:SaveIfXMLInvalid>
  <w:IgnoreMixedContent>false</w:IgnoreMixedContent>
  <w:AlwaysShowPlaceholderText>false</w:AlwaysShowPlaceholderText>
  <w:DoNotPromoteQF/>
  <w:LidThemeOther>EN-US</w:LidThemeOther>
  <w:LidThemeAsian>X-NONE</w:LidThemeAsian>
  <w:LidThemeComplexScript>X-NONE</w:LidThemeComplexScript>
  <w:Compatibility>
   <w:SplitPgBreakAndParaMark/>
   <w:UseFELayout/>
  </w:Compatibility>
  <m:mathPr>
   <m:mathFont m:val="Cambria Math"/>
   <m:brkBin m:val="before"/>
   <m:brkBinSub m:val="&#45;-"/>
   <m:smallFrac m:val="off"/>
   <m:dispDef/>
   <m:lMargin m:val="0"/>
   <m:rMargin m:val="0"/>
   <m:defJc m:val="centerGroup"/>
   <m:wrapIndent m:val="1440"/>
   <m:intLim m:val="subSup"/>
   <m:naryLim m:val="undOvr"/>
  </m:mathPr></w:WordDocument>
</xml><![endif]--><!--[if gte mso 9]><xml>
 <w:LatentStyles DefLockedState="false" DefUnhideWhenUsed="false"
  DefSemiHidden="false" DefQFormat="false" LatentStyleCount="276">
  <w:LsdException Locked="false" QFormat="true" Name="Normal"/>
  <w:LsdException Locked="false" QFormat="true" Name="heading 1"/>
  <w:LsdException Locked="false" QFormat="true" Name="heading 2"/>
  <w:LsdException Locked="false" QFormat="true" Name="heading 3"/>
  <w:LsdException Locked="false" QFormat="true" Name="heading 4"/>
  <w:LsdException Locked="false" SemiHidden="true" UnhideWhenUsed="true"
   QFormat="true" Name="heading 5"/>
  <w:LsdException Locked="false" SemiHidden="true" UnhideWhenUsed="true"
   QFormat="true" Name="heading 6"/>
  <w:LsdException Locked="false" SemiHidden="true" UnhideWhenUsed="true"
   QFormat="true" Name="heading 7"/>
  <w:LsdException Locked="false" SemiHidden="true" UnhideWhenUsed="true"
   QFormat="true" Name="heading 8"/>
  <w:LsdException Locked="false" SemiHidden="true" UnhideWhenUsed="true"
   QFormat="true" Name="heading 9"/>
  <w:LsdException Locked="false" SemiHidden="true" UnhideWhenUsed="true"
   QFormat="true" Name="caption"/>
  <w:LsdException Locked="false" QFormat="true" Name="Title"/>
  <w:LsdException Locked="false" Priority="1" Name="Default Paragraph Font"/>
  <w:LsdException Locked="false" QFormat="true" Name="Subtitle"/>
  <w:LsdException Locked="false" QFormat="true" Name="Strong"/>
  <w:LsdException Locked="false" QFormat="true" Name="Emphasis"/>
  <w:LsdException Locked="false" Priority="99" Name="No List"/>
  <w:LsdException Locked="false" QFormat="true" Name="No Spacing"/>
  <w:LsdException Locked="false" QFormat="true" Name="List Paragraph"/>
  <w:LsdException Locked="false" QFormat="true" Name="Quote"/>
  <w:LsdException Locked="false" QFormat="true" Name="Intense Quote"/>
  <w:LsdException Locked="false" QFormat="true" Name="Subtle Emphasis"/>
  <w:LsdException Locked="false" QFormat="true" Name="Intense Emphasis"/>
  <w:LsdException Locked="false" QFormat="true" Name="Subtle Reference"/>
  <w:LsdException Locked="false" QFormat="true" Name="Intense Reference"/>
  <w:LsdException Locked="false" QFormat="true" Name="Book Title"/>
  <w:LsdException Locked="false" SemiHidden="true" UnhideWhenUsed="true"
   QFormat="true" Name="TOC Heading"/>
 </w:LatentStyles>
</xml><![endif]-->
<style>
<!--
 /* Font Definitions */
@font-face
	{font-family:"Courier New";
	panose-1:2 7 3 9 2 2 5 2 4 4;
	mso-font-charset:0;
	mso-generic-font-family:auto;
	mso-font-pitch:variable;
	mso-font-signature:-536859905 -1073711037 9 0 511 0;}
@font-face
	{font-family:Times;
	panose-1:2 0 5 0 0 0 0 0 0 0;
	mso-font-charset:0;
	mso-generic-font-family:auto;
	mso-font-pitch:variable;
	mso-font-signature:3 0 0 0 1 0;}
@font-face
	{font-family:Wingdings;
	panose-1:5 0 0 0 0 0 0 0 0 0;
	mso-font-charset:2;
	mso-generic-font-family:auto;
	mso-font-pitch:variable;
	mso-font-signature:0 268435456 0 0 -2147483648 0;}
@font-face
	{font-family:"ＭＳ 明朝";
	mso-font-charset:78;
	mso-generic-font-family:auto;
	mso-font-pitch:variable;
	mso-font-signature:-536870145 1791491579 18 0 131231 0;}
@font-face
	{font-family:"Cambria Math";
	panose-1:2 4 5 3 5 4 6 3 2 4;
	mso-font-charset:0;
	mso-generic-font-family:auto;
	mso-font-pitch:variable;
	mso-font-signature:-536870145 1107305727 0 0 415 0;}
@font-face
	{font-family:"American Typewriter";
	panose-1:2 9 6 4 2 0 4 2 3 4;
	mso-font-charset:0;
	mso-generic-font-family:auto;
	mso-font-pitch:variable;
	mso-font-signature:-1610612625 25 0 0 273 0;}
@font-face
	{font-family:Monaco;
	panose-1:2 0 5 0 0 0 0 0 0 0;
	mso-font-charset:0;
	mso-generic-font-family:auto;
	mso-font-pitch:variable;
	mso-font-signature:3 0 0 0 1 0;}
@font-face
	{font-family:"Lucida Grande";
	panose-1:2 11 6 0 4 5 2 2 2 4;
	mso-font-charset:0;
	mso-generic-font-family:auto;
	mso-font-pitch:variable;
	mso-font-signature:-520090897 1342218751 0 0 447 0;}
 /* Style Definitions */
p.MsoNormal, li.MsoNormal, div.MsoNormal
	{mso-style-unhide:no;
	mso-style-qformat:yes;
	mso-style-parent:"";
	margin:0in;
	margin-bottom:.0001pt;
	mso-pagination:widow-orphan;
	font-size:10.0pt;
	font-family:Times;
	mso-fareast-font-family:"ＭＳ 明朝";
	mso-fareast-theme-font:minor-fareast;
	mso-bidi-font-family:"Times New Roman";
	mso-bidi-theme-font:minor-bidi;}
h2
	{mso-style-unhide:no;
	mso-style-qformat:yes;
	mso-style-link:"Heading 2 Char";
	margin:0in;
	margin-bottom:.0001pt;
	mso-pagination:widow-orphan;
	mso-outline-level:2;
	font-size:18.0pt;
	font-family:Times;
	mso-fareast-font-family:"ＭＳ 明朝";
	mso-fareast-theme-font:minor-fareast;
	mso-bidi-font-family:"Times New Roman";
	mso-bidi-theme-font:minor-bidi;
	font-weight:bold;}
h3
	{mso-style-unhide:no;
	mso-style-qformat:yes;
	mso-style-link:"Heading 3 Char";
	margin-top:0in;
	margin-right:0in;
	margin-bottom:0in;
	margin-left:22.5pt;
	margin-bottom:.0001pt;
	mso-pagination:widow-orphan;
	mso-outline-level:3;
	font-size:14.0pt;
	font-family:Times;
	mso-fareast-font-family:"Times New Roman";
	mso-bidi-font-family:"Times New Roman";
	mso-bidi-theme-font:minor-bidi;
	font-weight:bold;}
h4
	{mso-style-unhide:no;
	mso-style-qformat:yes;
	mso-style-link:"Heading 4 Char";
	margin:0in;
	margin-bottom:.0001pt;
	mso-pagination:widow-orphan;
	mso-outline-level:4;
	font-size:12.0pt;
	font-family:Times;
	mso-fareast-font-family:"ＭＳ 明朝";
	mso-fareast-theme-font:minor-fareast;
	mso-bidi-font-family:"Times New Roman";
	mso-bidi-theme-font:minor-bidi;
	font-weight:bold;}
a:link, span.MsoHyperlink
	{mso-style-unhide:no;
	color:#DD0000;
	text-decoration:underline;
	text-underline:single;}
a:visited, span.MsoHyperlinkFollowed
	{mso-style-unhide:no;
	color:lime;
	text-decoration:underline;
	text-underline:single;}
p
	{mso-style-unhide:no;
	margin:0in;
	margin-bottom:.0001pt;
	mso-pagination:widow-orphan;
	font-size:10.0pt;
	font-family:Times;
	mso-fareast-font-family:"ＭＳ 明朝";
	mso-fareast-theme-font:minor-fareast;
	mso-bidi-font-family:"Times New Roman";}
pre
	{mso-style-unhide:no;
	mso-style-link:"HTML Preformatted Char";
	margin:0in;
	margin-bottom:.0001pt;
	mso-pagination:widow-orphan;
	tab-stops:45.8pt 91.6pt 137.4pt 183.2pt 229.0pt 274.8pt 320.6pt 366.4pt 412.2pt 458.0pt 503.8pt 549.6pt 595.4pt 641.2pt 687.0pt 732.8pt;
	font-size:10.0pt;
	font-family:Courier;
	mso-fareast-font-family:"ＭＳ 明朝";
	mso-fareast-theme-font:minor-fareast;
	mso-bidi-font-family:Courier;}
tt
	{mso-style-unhide:no;
	font-family:Courier;
	mso-ascii-font-family:Courier;
	mso-fareast-font-family:"ＭＳ 明朝";
	mso-fareast-theme-font:minor-fareast;
	mso-hansi-font-family:Courier;
	mso-bidi-font-family:Courier;}
p.MsoAcetate, li.MsoAcetate, div.MsoAcetate
	{mso-style-unhide:no;
	mso-style-link:"Balloon Text Char";
	margin:0in;
	margin-bottom:.0001pt;
	mso-pagination:widow-orphan;
	font-size:9.0pt;
	font-family:"Lucida Grande";
	mso-fareast-font-family:"ＭＳ 明朝";
	mso-fareast-theme-font:minor-fareast;
	mso-bidi-font-family:"Times New Roman";
	mso-bidi-theme-font:minor-bidi;}
span.MsoPlaceholderText
	{mso-style-unhide:no;
	color:gray;}
p.MsoListParagraph, li.MsoListParagraph, div.MsoListParagraph
	{mso-style-unhide:no;
	mso-style-qformat:yes;
	margin-top:0in;
	margin-right:0in;
	margin-bottom:0in;
	margin-left:.5in;
	margin-bottom:.0001pt;
	mso-add-space:auto;
	mso-pagination:widow-orphan;
	font-size:10.0pt;
	font-family:Times;
	mso-fareast-font-family:"ＭＳ 明朝";
	mso-fareast-theme-font:minor-fareast;
	mso-bidi-font-family:"Times New Roman";
	mso-bidi-theme-font:minor-bidi;}
p.MsoListParagraphCxSpFirst, li.MsoListParagraphCxSpFirst, div.MsoListParagraphCxSpFirst
	{mso-style-unhide:no;
	mso-style-qformat:yes;
	mso-style-type:export-only;
	margin-top:0in;
	margin-right:0in;
	margin-bottom:0in;
	margin-left:.5in;
	margin-bottom:.0001pt;
	mso-add-space:auto;
	mso-pagination:widow-orphan;
	font-size:10.0pt;
	font-family:Times;
	mso-fareast-font-family:"ＭＳ 明朝";
	mso-fareast-theme-font:minor-fareast;
	mso-bidi-font-family:"Times New Roman";
	mso-bidi-theme-font:minor-bidi;}
p.MsoListParagraphCxSpMiddle, li.MsoListParagraphCxSpMiddle, div.MsoListParagraphCxSpMiddle
	{mso-style-unhide:no;
	mso-style-qformat:yes;
	mso-style-type:export-only;
	margin-top:0in;
	margin-right:0in;
	margin-bottom:0in;
	margin-left:.5in;
	margin-bottom:.0001pt;
	mso-add-space:auto;
	mso-pagination:widow-orphan;
	font-size:10.0pt;
	font-family:Times;
	mso-fareast-font-family:"ＭＳ 明朝";
	mso-fareast-theme-font:minor-fareast;
	mso-bidi-font-family:"Times New Roman";
	mso-bidi-theme-font:minor-bidi;}
p.MsoListParagraphCxSpLast, li.MsoListParagraphCxSpLast, div.MsoListParagraphCxSpLast
	{mso-style-unhide:no;
	mso-style-qformat:yes;
	mso-style-type:export-only;
	margin-top:0in;
	margin-right:0in;
	margin-bottom:0in;
	margin-left:.5in;
	margin-bottom:.0001pt;
	mso-add-space:auto;
	mso-pagination:widow-orphan;
	font-size:10.0pt;
	font-family:Times;
	mso-fareast-font-family:"ＭＳ 明朝";
	mso-fareast-theme-font:minor-fareast;
	mso-bidi-font-family:"Times New Roman";
	mso-bidi-theme-font:minor-bidi;}
span.Heading2Char
	{mso-style-name:"Heading 2 Char";
	mso-style-unhide:no;
	mso-style-locked:yes;
	mso-style-link:"Heading 2";
	mso-ansi-font-size:13.0pt;
	mso-bidi-font-size:13.0pt;
	font-family:Calibri;
	mso-ascii-font-family:Calibri;
	mso-ascii-theme-font:major-latin;
	mso-fareast-font-family:"ＭＳ ゴシック";
	mso-fareast-theme-font:major-fareast;
	mso-hansi-font-family:Calibri;
	mso-hansi-theme-font:major-latin;
	mso-bidi-font-family:"Times New Roman";
	mso-bidi-theme-font:major-bidi;
	color:#4F81BD;
	font-weight:bold;}
span.Heading3Char
	{mso-style-name:"Heading 3 Char";
	mso-style-unhide:no;
	mso-style-locked:yes;
	mso-style-link:"Heading 3";
	mso-ansi-font-size:14.0pt;
	mso-bidi-font-size:14.0pt;
	font-family:Times;
	mso-ascii-font-family:Times;
	mso-hansi-font-family:Times;
	mso-bidi-font-family:"Times New Roman";
	mso-bidi-theme-font:minor-bidi;
	font-weight:bold;}
span.Heading4Char
	{mso-style-name:"Heading 4 Char";
	mso-style-unhide:no;
	mso-style-locked:yes;
	mso-style-link:"Heading 4";
	mso-ansi-font-size:10.0pt;
	mso-bidi-font-size:10.0pt;
	font-family:Calibri;
	mso-ascii-font-family:Calibri;
	mso-ascii-theme-font:major-latin;
	mso-fareast-font-family:"ＭＳ ゴシック";
	mso-fareast-theme-font:major-fareast;
	mso-hansi-font-family:Calibri;
	mso-hansi-theme-font:major-latin;
	mso-bidi-font-family:"Times New Roman";
	mso-bidi-theme-font:major-bidi;
	color:#4F81BD;
	font-weight:bold;
	font-style:italic;}
span.HTMLPreformattedChar
	{mso-style-name:"HTML Preformatted Char";
	mso-style-unhide:no;
	mso-style-locked:yes;
	mso-style-link:"HTML Preformatted";
	mso-ansi-font-size:10.0pt;
	mso-bidi-font-size:10.0pt;
	font-family:"Courier New";
	mso-ascii-font-family:"Courier New";
	mso-hansi-font-family:"Courier New";}
span.BalloonTextChar
	{mso-style-name:"Balloon Text Char";
	mso-style-unhide:no;
	mso-style-locked:yes;
	mso-style-link:"Balloon Text";
	mso-ansi-font-size:9.0pt;
	mso-bidi-font-size:9.0pt;
	font-family:"Lucida Grande";
	mso-ascii-font-family:"Lucida Grande";
	mso-fareast-font-family:"ＭＳ 明朝";
	mso-fareast-theme-font:minor-fareast;
	mso-hansi-font-family:"Lucida Grande";
	mso-bidi-font-family:"Times New Roman";
	mso-bidi-theme-font:minor-bidi;}
span.SpellE
	{mso-style-name:"";
	mso-spl-e:yes;}
.MsoChpDefault
	{mso-style-type:export-only;
	mso-default-props:yes;
	font-size:10.0pt;
	mso-ansi-font-size:10.0pt;
	mso-bidi-font-size:10.0pt;}
@page WordSection1
	{size:8.5in 11.0in;
	margin:1.0in 1.25in 1.0in 1.25in;
	mso-header-margin:.5in;
	mso-footer-margin:.5in;
	mso-paper-source:0;}
div.WordSection1
	{page:WordSection1;}
 /* List Definitions */
@list l0
	{mso-list-id:-227;
	mso-list-template-ids:-22624936;}
@list l0:level1
	{mso-level-number-format:bullet;
	mso-level-text:"";
	mso-level-tab-stop:0in;
	mso-level-number-position:left;
	margin-left:0in;
	text-indent:0in;
	font-family:Symbol;}
@list l0:level2
	{mso-level-number-format:bullet;
	mso-level-text:;
	mso-level-tab-stop:.5in;
	mso-level-number-position:left;
	margin-left:.75in;
	text-indent:-.25in;
	font-family:Symbol;}
@list l0:level3
	{mso-level-number-format:bullet;
	mso-level-text:o;
	mso-level-tab-stop:1.0in;
	mso-level-number-position:left;
	margin-left:1.25in;
	text-indent:-.25in;
	font-family:"Courier New";}
@list l0:level4
	{mso-level-number-format:bullet;
	mso-level-text:;
	mso-level-tab-stop:1.5in;
	mso-level-number-position:left;
	margin-left:1.75in;
	text-indent:-.25in;
	font-family:Wingdings;}
@list l0:level5
	{mso-level-number-format:bullet;
	mso-level-text:;
	mso-level-tab-stop:2.0in;
	mso-level-number-position:left;
	margin-left:2.25in;
	text-indent:-.25in;
	font-family:Wingdings;}
@list l0:level6
	{mso-level-number-format:bullet;
	mso-level-text:;
	mso-level-tab-stop:2.5in;
	mso-level-number-position:left;
	margin-left:2.75in;
	text-indent:-.25in;
	font-family:Symbol;}
@list l0:level7
	{mso-level-number-format:bullet;
	mso-level-text:o;
	mso-level-tab-stop:3.0in;
	mso-level-number-position:left;
	margin-left:3.25in;
	text-indent:-.25in;
	font-family:"Courier New";}
@list l0:level8
	{mso-level-number-format:bullet;
	mso-level-text:;
	mso-level-tab-stop:3.5in;
	mso-level-number-position:left;
	margin-left:3.75in;
	text-indent:-.25in;
	font-family:Wingdings;}
@list l0:level9
	{mso-level-number-format:bullet;
	mso-level-text:;
	mso-level-tab-stop:4.0in;
	mso-level-number-position:left;
	margin-left:4.25in;
	text-indent:-.25in;
	font-family:Wingdings;}
@list l1
	{mso-list-id:-132;
	mso-list-type:simple;
	mso-list-template-ids:1581565618;}
@list l1:level1
	{mso-level-tab-stop:1.25in;
	mso-level-number-position:left;
	margin-left:1.25in;
	text-indent:-.25in;}
@list l2
	{mso-list-id:-131;
	mso-list-type:simple;
	mso-list-template-ids:-530946816;}
@list l2:level1
	{mso-level-tab-stop:1.0in;
	mso-level-number-position:left;
	margin-left:1.0in;
	text-indent:-.25in;}
@list l3
	{mso-list-id:-130;
	mso-list-type:simple;
	mso-list-template-ids:787875982;}
@list l3:level1
	{mso-level-tab-stop:.75in;
	mso-level-number-position:left;
	margin-left:.75in;
	text-indent:-.25in;}
@list l4
	{mso-list-id:-129;
	mso-list-type:simple;
	mso-list-template-ids:833362122;}
@list l5
	{mso-list-id:-128;
	mso-list-type:simple;
	mso-list-template-ids:67168544;}
@list l5:level1
	{mso-level-number-format:bullet;
	mso-level-text:;
	mso-level-tab-stop:1.25in;
	mso-level-number-position:left;
	margin-left:1.25in;
	text-indent:-.25in;
	font-family:Symbol;}
@list l6
	{mso-list-id:-127;
	mso-list-type:simple;
	mso-list-template-ids:407522890;}
@list l6:level1
	{mso-level-number-format:bullet;
	mso-level-text:;
	mso-level-tab-stop:1.0in;
	mso-level-number-position:left;
	margin-left:1.0in;
	text-indent:-.25in;
	font-family:Symbol;}
@list l7
	{mso-list-id:-126;
	mso-list-type:simple;
	mso-list-template-ids:-1199825008;}
@list l7:level1
	{mso-level-number-format:bullet;
	mso-level-text:;
	mso-level-tab-stop:.75in;
	mso-level-number-position:left;
	margin-left:.75in;
	text-indent:-.25in;
	font-family:Symbol;}
@list l8
	{mso-list-id:-125;
	mso-list-type:simple;
	mso-list-template-ids:543180864;}
@list l8:level1
	{mso-level-number-format:bullet;
	mso-level-text:;
	mso-level-tab-stop:.5in;
	mso-level-number-position:left;
	text-indent:-.25in;
	font-family:Symbol;}
@list l9
	{mso-list-id:-120;
	mso-list-type:simple;
	mso-list-template-ids:799814212;}
@list l9:level1
	{mso-level-tab-stop:.25in;
	mso-level-number-position:left;
	margin-left:.25in;
	text-indent:-.25in;}
@list l10
	{mso-list-id:-119;
	mso-list-type:simple;
	mso-list-template-ids:483287030;}
@list l10:level1
	{mso-level-number-format:bullet;
	mso-level-text:;
	mso-level-tab-stop:.25in;
	mso-level-number-position:left;
	margin-left:.25in;
	text-indent:-.25in;
	font-family:Symbol;}
@list l11
	{mso-list-id:243995393;
	mso-list-type:hybrid;
	mso-list-template-ids:-1839056566 67698703 67698713 67698715 67698703 67698713 67698715 67698703 67698713 67698715;}
@list l11:level1
	{mso-level-tab-stop:none;
	mso-level-number-position:left;
	margin-left:113.0pt;
	text-indent:-.25in;}
@list l11:level2
	{mso-level-number-format:alpha-lower;
	mso-level-tab-stop:none;
	mso-level-number-position:left;
	margin-left:149.0pt;
	text-indent:-.25in;}
@list l11:level3
	{mso-level-number-format:roman-lower;
	mso-level-tab-stop:none;
	mso-level-number-position:right;
	margin-left:185.0pt;
	text-indent:-9.0pt;}
@list l11:level4
	{mso-level-tab-stop:none;
	mso-level-number-position:left;
	margin-left:221.0pt;
	text-indent:-.25in;}
@list l11:level5
	{mso-level-number-format:alpha-lower;
	mso-level-tab-stop:none;
	mso-level-number-position:left;
	margin-left:257.0pt;
	text-indent:-.25in;}
@list l11:level6
	{mso-level-number-format:roman-lower;
	mso-level-tab-stop:none;
	mso-level-number-position:right;
	margin-left:293.0pt;
	text-indent:-9.0pt;}
@list l11:level7
	{mso-level-tab-stop:none;
	mso-level-number-position:left;
	margin-left:329.0pt;
	text-indent:-.25in;}
@list l11:level8
	{mso-level-number-format:alpha-lower;
	mso-level-tab-stop:none;
	mso-level-number-position:left;
	margin-left:365.0pt;
	text-indent:-.25in;}
@list l11:level9
	{mso-level-number-format:roman-lower;
	mso-level-tab-stop:none;
	mso-level-number-position:right;
	margin-left:401.0pt;
	text-indent:-9.0pt;}
@list l12
	{mso-list-id:993725718;
	mso-list-template-ids:-109415776;}
@list l12:level1
	{mso-level-number-format:bullet;
	mso-level-text:;
	mso-level-tab-stop:.5in;
	mso-level-number-position:left;
	text-indent:-.25in;
	mso-ansi-font-size:10.0pt;
	font-family:Symbol;}
@list l12:level2
	{mso-level-tab-stop:1.0in;
	mso-level-number-position:left;
	text-indent:-.25in;}
@list l12:level3
	{mso-level-tab-stop:1.5in;
	mso-level-number-position:left;
	text-indent:-.25in;}
@list l12:level4
	{mso-level-tab-stop:2.0in;
	mso-level-number-position:left;
	text-indent:-.25in;}
@list l12:level5
	{mso-level-tab-stop:2.5in;
	mso-level-number-position:left;
	text-indent:-.25in;}
@list l12:level6
	{mso-level-tab-stop:3.0in;
	mso-level-number-position:left;
	text-indent:-.25in;}
@list l12:level7
	{mso-level-tab-stop:3.5in;
	mso-level-number-position:left;
	text-indent:-.25in;}
@list l12:level8
	{mso-level-tab-stop:4.0in;
	mso-level-number-position:left;
	text-indent:-.25in;}
@list l12:level9
	{mso-level-tab-stop:4.5in;
	mso-level-number-position:left;
	text-indent:-.25in;}
@list l13
	{mso-list-id:1197086710;
	mso-list-type:hybrid;
	mso-list-template-ids:-11911742 67698703 67698713 67698715 67698703 67698713 67698715 67698703 67698713 67698715;}
@list l13:level1
	{mso-level-tab-stop:none;
	mso-level-number-position:left;
	text-indent:-.25in;}
@list l13:level2
	{mso-level-number-format:alpha-lower;
	mso-level-tab-stop:none;
	mso-level-number-position:left;
	text-indent:-.25in;}
@list l13:level3
	{mso-level-number-format:roman-lower;
	mso-level-tab-stop:none;
	mso-level-number-position:right;
	text-indent:-9.0pt;}
@list l13:level4
	{mso-level-tab-stop:none;
	mso-level-number-position:left;
	text-indent:-.25in;}
@list l13:level5
	{mso-level-number-format:alpha-lower;
	mso-level-tab-stop:none;
	mso-level-number-position:left;
	text-indent:-.25in;}
@list l13:level6
	{mso-level-number-format:roman-lower;
	mso-level-tab-stop:none;
	mso-level-number-position:right;
	text-indent:-9.0pt;}
@list l13:level7
	{mso-level-tab-stop:none;
	mso-level-number-position:left;
	text-indent:-.25in;}
@list l13:level8
	{mso-level-number-format:alpha-lower;
	mso-level-tab-stop:none;
	mso-level-number-position:left;
	text-indent:-.25in;}
@list l13:level9
	{mso-level-number-format:roman-lower;
	mso-level-tab-stop:none;
	mso-level-number-position:right;
	text-indent:-9.0pt;}
@list l14
	{mso-list-id:1739284579;
	mso-list-type:hybrid;
	mso-list-template-ids:554607174 67698689 67698691 67698693 67698689 67698691 67698693 67698689 67698691 67698693;}
@list l14:level1
	{mso-level-number-format:bullet;
	mso-level-text:;
	mso-level-tab-stop:none;
	mso-level-number-position:left;
	text-indent:-.25in;
	font-family:Symbol;}
@list l14:level2
	{mso-level-number-format:bullet;
	mso-level-text:o;
	mso-level-tab-stop:none;
	mso-level-number-position:left;
	text-indent:-.25in;
	font-family:"Courier New";
	mso-bidi-font-family:"Times New Roman";}
@list l14:level3
	{mso-level-number-format:bullet;
	mso-level-text:;
	mso-level-tab-stop:none;
	mso-level-number-position:left;
	text-indent:-.25in;
	font-family:Wingdings;}
@list l14:level4
	{mso-level-number-format:bullet;
	mso-level-text:;
	mso-level-tab-stop:none;
	mso-level-number-position:left;
	text-indent:-.25in;
	font-family:Symbol;}
@list l14:level5
	{mso-level-number-format:bullet;
	mso-level-text:o;
	mso-level-tab-stop:none;
	mso-level-number-position:left;
	text-indent:-.25in;
	font-family:"Courier New";
	mso-bidi-font-family:"Times New Roman";}
@list l14:level6
	{mso-level-number-format:bullet;
	mso-level-text:;
	mso-level-tab-stop:none;
	mso-level-number-position:left;
	text-indent:-.25in;
	font-family:Wingdings;}
@list l14:level7
	{mso-level-number-format:bullet;
	mso-level-text:;
	mso-level-tab-stop:none;
	mso-level-number-position:left;
	text-indent:-.25in;
	font-family:Symbol;}
@list l14:level8
	{mso-level-number-format:bullet;
	mso-level-text:o;
	mso-level-tab-stop:none;
	mso-level-number-position:left;
	text-indent:-.25in;
	font-family:"Courier New";
	mso-bidi-font-family:"Times New Roman";}
@list l14:level9
	{mso-level-number-format:bullet;
	mso-level-text:;
	mso-level-tab-stop:none;
	mso-level-number-position:left;
	text-indent:-.25in;
	font-family:Wingdings;}
@list l15
	{mso-list-id:1780904919;
	mso-list-template-ids:-1286570046;}
@list l15:level1
	{mso-level-number-format:bullet;
	mso-level-text:;
	mso-level-tab-stop:.5in;
	mso-level-number-position:left;
	text-indent:-.25in;
	mso-ansi-font-size:10.0pt;
	font-family:Symbol;}
@list l15:level2
	{mso-level-number-format:bullet;
	mso-level-text:;
	mso-level-tab-stop:1.0in;
	mso-level-number-position:left;
	text-indent:-.25in;
	mso-ansi-font-size:10.0pt;
	font-family:Symbol;}
@list l15:level3
	{mso-level-number-format:bullet;
	mso-level-text:;
	mso-level-tab-stop:1.5in;
	mso-level-number-position:left;
	text-indent:-.25in;
	mso-ansi-font-size:10.0pt;
	font-family:Symbol;}
@list l15:level4
	{mso-level-number-format:bullet;
	mso-level-text:;
	mso-level-tab-stop:2.0in;
	mso-level-number-position:left;
	text-indent:-.25in;
	mso-ansi-font-size:10.0pt;
	font-family:Symbol;}
@list l15:level5
	{mso-level-number-format:bullet;
	mso-level-text:;
	mso-level-tab-stop:2.5in;
	mso-level-number-position:left;
	text-indent:-.25in;
	mso-ansi-font-size:10.0pt;
	font-family:Symbol;}
@list l15:level6
	{mso-level-number-format:bullet;
	mso-level-text:;
	mso-level-tab-stop:3.0in;
	mso-level-number-position:left;
	text-indent:-.25in;
	mso-ansi-font-size:10.0pt;
	font-family:Symbol;}
@list l15:level7
	{mso-level-number-format:bullet;
	mso-level-text:;
	mso-level-tab-stop:3.5in;
	mso-level-number-position:left;
	text-indent:-.25in;
	mso-ansi-font-size:10.0pt;
	font-family:Symbol;}
@list l15:level8
	{mso-level-number-format:bullet;
	mso-level-text:;
	mso-level-tab-stop:4.0in;
	mso-level-number-position:left;
	text-indent:-.25in;
	mso-ansi-font-size:10.0pt;
	font-family:Symbol;}
@list l15:level9
	{mso-level-number-format:bullet;
	mso-level-text:;
	mso-level-tab-stop:4.5in;
	mso-level-number-position:left;
	text-indent:-.25in;
	mso-ansi-font-size:10.0pt;
	font-family:Symbol;}
ol
	{margin-bottom:0in;}
ul
	{margin-bottom:0in;}
-->
</style>
<!--[if gte mso 10]>
<style>
 /* Style Definitions */
table.MsoNormalTable
	{mso-style-name:"Table Normal";
	mso-tstyle-rowband-size:0;
	mso-tstyle-colband-size:0;
	mso-style-noshow:yes;
	mso-style-priority:99;
	mso-style-parent:"";
	mso-padding-alt:0in 5.4pt 0in 5.4pt;
	mso-para-margin:0in;
	mso-para-margin-bottom:.0001pt;
	mso-pagination:widow-orphan;
	font-size:10.0pt;
	font-family:"Times New Roman";}
</style>
<![endif]--><!--[if gte mso 9]><xml>
 <o:shapedefaults v:ext="edit" spidmax="1027"/>
</xml><![endif]--><!--[if gte mso 9]><xml>
 <o:shapelayout v:ext="edit">
  <o:idmap v:ext="edit" data="1"/>
 </o:shapelayout></xml><![endif]-->
</head>

<body bgcolor=white lang=EN-US link="#DD0000" vlink=lime style='tab-interval:
.5in'>

<div class=WordSection1>

<h2><span style='mso-fareast-font-family:"Times New Roman"'>CS 200, Spring
2014: Assignment 4<br>
Hash Tables and finally Document Retrieval <o:p></o:p></span></h2>

<p class=MsoNormal><img width=599 height=11 id="_x0000_i1035"
src="http://www.cs.colostate.edu/~howe/rainbow-bar.gif"></p>

<h3 style='margin-left:0in'><o:p>&nbsp;</o:p></h3>

<h3 style='margin-left:0in'>Due Wednesday 11/19/14 by Noon </h3>

<p class=MsoNormal><o:p>&nbsp;</o:p></p>

<p class=MsoNormal><span style='font-size:12.0pt'>This assignment has two
purposes: to practice hash table implementation and to handle multi-word
queries. For the hash table part, as with the BST, you should be able to limit
your modifications, as much as possible, to the WebPages class. <o:p></o:p></span></p>

<p class=MsoNormal><o:p>&nbsp;</o:p></p>

<h3 style='margin-left:0in'>New Data Structure: HashTable</h3>

<h3 style='margin-left:0in'><o:p>&nbsp;</o:p></h3>

<p class=MsoNormal><span style='font-size:12.0pt'>Hash tables can provide close
to constant time access to data retrieved by key. So for this assignment, you
will be implementing the term index using a hash table. Essentially, you should
be re-doing what you did to replace the ArrayList implementation with the BST,
but now you are replacing the BST with the hash table. <o:p></o:p></span></p>

<p><o:p>&nbsp;</o:p></p>

<p><span style='font-size:12.0pt'>Some specifics: <o:p></o:p></span></p>

<p class=MsoNormal style='margin-left:0in;text-indent:0in;mso-list:l12 level1 lfo3'><![if !supportLists]><span
style='mso-bidi-font-size:12.0pt;font-family:Symbol;mso-fareast-font-family:
Symbol;mso-bidi-font-family:Symbol'><span style='mso-list:Ignore'>·<span
style='font:7.0pt "Times New Roman"'>&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;
</span></span></span><![endif]><span style='font-size:12.0pt'>The hash table
should be implemented with the <i style='mso-bidi-font-style:normal'>quadratic
probing</i> as the method to deal with collisions. This makes it distinct from
the standard Java implementation (see <a
href="http://download.oracle.com/javase/1.5.0/docs/api/java/util/Hashtable.html">http://download.oracle.com/javase/1.5.0/docs/api/java/util/Hashtable.html</a>
for the API description). <o:p></o:p></span></p>

<p class=MsoNormal style='margin-left:0in;text-indent:0in;mso-list:l12 level1 lfo3'><![if !supportLists]><span
style='mso-bidi-font-size:12.0pt;font-family:Symbol;mso-fareast-font-family:
Symbol;mso-bidi-font-family:Symbol'><span style='mso-list:Ignore'>·<span
style='font:7.0pt "Times New Roman"'>&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;
</span></span></span><![endif]><span style='font-size:12.0pt'>With quadratic
probing, it can be difficult to tell if every possible position has been
checked to find a value. So when searching for a key, in addition to the
standard check for whether the next probed position has a value or RESERVED in
it, you should count the number of probes and terminate if the number matches
the size of the array (this is to prevent potential infinite loops). <o:p></o:p></span></p>

<p class=MsoNormal style='margin-left:0in;text-indent:0in;mso-list:l12 level1 lfo3'><![if !supportLists]><span
style='mso-bidi-font-size:12.0pt;font-family:Symbol;mso-fareast-font-family:
Symbol;mso-bidi-font-family:Symbol'><span style='mso-list:Ignore'>·<span
style='font:7.0pt "Times New Roman"'>&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;
</span></span></span><![endif]><span style='font-size:12.0pt'>You should use
the </span><tt><span style='font-size:12.0pt'>hashCode()</span></tt><span
style='font-size:12.0pt'> method available in Java for String to determine the
hash value for each term... with two caveats. hashCode is case sensitive;
consequently, you need to convert your string to lower case before generating
the hash code for it. <span class=SpellE>hashCode</span> can return negative
values, so take the absolute value.<o:p></o:p></span></p>

<p class=MsoNormal style='margin-left:0in;text-indent:0in;mso-list:l12 level1 lfo3'><![if !supportLists]><span
style='mso-bidi-font-size:12.0pt;font-family:Symbol;mso-fareast-font-family:
Symbol;mso-bidi-font-family:Symbol'><span style='mso-list:Ignore'>·<span
style='font:7.0pt "Times New Roman"'>&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;
</span></span></span><![endif]><span style='font-size:12.0pt'>Your hash table
size will be read in as the first line in the input file. For grading it is
critical that all students use the same array size and allow us to modify it as
part of the test case. So the hash table constructor should take the size as an
argument.<o:p></o:p></span></p>

<p class=MsoNormal style='margin-left:0in;text-indent:0in;mso-list:l12 level1 lfo3'><![if !supportLists]><span
style='mso-bidi-font-size:12.0pt;font-family:Symbol;mso-fareast-font-family:
Symbol;mso-bidi-font-family:Symbol'><span style='mso-list:Ignore'>·<span
style='font:7.0pt "Times New Roman"'>&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;
</span></span></span><![endif]><span style='font-size:12.0pt'>By its nature,
the hash table will mangle the ordering of the data. So be it, that’s how we
can tell your hash table is working properly.<o:p></o:p></span></p>

<p class=MsoNormal><span style='font-size:12.0pt'><o:p>&nbsp;</o:p></span></p>

<p class=MsoNormal><span style='font-size:12.0pt'>Your hash table should
implement the following interface:<o:p></o:p></span></p>

<p class=MsoNormal style='mso-pagination:none;mso-layout-grid-align:none;
text-autospace:none'><span style='font-size:12.0pt'><span
style="mso-spacerun:yes"> </span></span><b style='mso-bidi-font-weight:normal'><span
style='font-size:11.0pt;font-family:Monaco;mso-bidi-font-family:Monaco;
color:#7F0055'>public</span></b><span style='font-size:11.0pt;font-family:Monaco;
mso-bidi-font-family:Monaco;color:black'> </span><b style='mso-bidi-font-weight:
normal'><span style='font-size:11.0pt;font-family:Monaco;mso-bidi-font-family:
Monaco;color:#7F0055'>interface</span></b><span style='font-size:11.0pt;
font-family:Monaco;mso-bidi-font-family:Monaco;color:black'> TermIndex {</span><span
style='font-size:11.0pt;font-family:Monaco;mso-bidi-font-family:Monaco'><o:p></o:p></span></p>

<p class=MsoNormal style='mso-pagination:none;mso-layout-grid-align:none;
text-autospace:none'><span style='font-size:11.0pt;font-family:Monaco;
mso-bidi-font-family:Monaco;color:black'><span style='mso-tab-count:1'>     </span></span><b
style='mso-bidi-font-weight:normal'><span style='font-size:11.0pt;font-family:
Monaco;mso-bidi-font-family:Monaco;color:#7F0055'>public</span></b><span
style='font-size:11.0pt;font-family:Monaco;mso-bidi-font-family:Monaco;
color:black'> </span><b style='mso-bidi-font-weight:normal'><span
style='font-size:11.0pt;font-family:Monaco;mso-bidi-font-family:Monaco;
color:#7F0055'>void</span></b><span style='font-size:11.0pt;font-family:Monaco;
mso-bidi-font-family:Monaco;color:black'> add(String filename, String newWord);</span><span
style='font-size:11.0pt;font-family:Monaco;mso-bidi-font-family:Monaco'><o:p></o:p></span></p>

<p class=MsoNormal style='mso-pagination:none;mso-layout-grid-align:none;
text-autospace:none'><span style='font-size:11.0pt;font-family:Monaco;
mso-bidi-font-family:Monaco;color:black'><span style='mso-tab-count:1'>     </span></span><b
style='mso-bidi-font-weight:normal'><span style='font-size:11.0pt;font-family:
Monaco;mso-bidi-font-family:Monaco;color:#7F0055'>public</span></b><span
style='font-size:11.0pt;font-family:Monaco;mso-bidi-font-family:Monaco;
color:black'> </span><b style='mso-bidi-font-weight:normal'><span
style='font-size:11.0pt;font-family:Monaco;mso-bidi-font-family:Monaco;
color:#7F0055'>int</span></b><span style='font-size:11.0pt;font-family:Monaco;
mso-bidi-font-family:Monaco;color:black'> size();</span><span style='font-size:
11.0pt;font-family:Monaco;mso-bidi-font-family:Monaco'><o:p></o:p></span></p>

<p class=MsoNormal style='mso-pagination:none;mso-layout-grid-align:none;
text-autospace:none'><span style='font-size:11.0pt;font-family:Monaco;
mso-bidi-font-family:Monaco;color:black'><span style='mso-tab-count:1'>     </span></span><b
style='mso-bidi-font-weight:normal'><span style='font-size:11.0pt;font-family:
Monaco;mso-bidi-font-family:Monaco;color:#7F0055'>public</span></b><span
style='font-size:11.0pt;font-family:Monaco;mso-bidi-font-family:Monaco;
color:black'> </span><b style='mso-bidi-font-weight:normal'><span
style='font-size:11.0pt;font-family:Monaco;mso-bidi-font-family:Monaco;
color:#7F0055'>void</span></b><span style='font-size:11.0pt;font-family:Monaco;
mso-bidi-font-family:Monaco;color:black'> delete(String word);</span><span
style='font-size:11.0pt;font-family:Monaco;mso-bidi-font-family:Monaco'><o:p></o:p></span></p>

<p class=MsoNormal><span style='font-size:11.0pt;font-family:Monaco;mso-bidi-font-family:
Monaco;color:black'><span style='mso-tab-count:1'>     </span></span><b
style='mso-bidi-font-weight:normal'><span style='font-size:11.0pt;font-family:
Monaco;mso-bidi-font-family:Monaco;color:#7F0055'>public</span></b><span
style='font-size:11.0pt;font-family:Monaco;mso-bidi-font-family:Monaco;
color:black'> Term get(String word, Boolean printP);<o:p></o:p></span></p>

<p class=MsoNormal><span style='font-size:11.0pt;font-family:Monaco;mso-bidi-font-family:
Monaco;color:black'>}<o:p></o:p></span></p>

<p class=MsoNormal><span style='font-size:11.0pt;font-family:Monaco;mso-bidi-font-family:
Monaco;color:black'><o:p>&nbsp;</o:p></span></p>

<h4><span style='mso-fareast-font-family:"Times New Roman";font-weight:normal'>Note:
these methods have the same signatures as the ones for BST. So if you follow
this interface, you should minimize the disruption to your existing code. In
this case, the Boolean won’t be used in get (it was there to print the depth in
the BST). <o:p></o:p></span></h4>

<h4><span style='mso-fareast-font-family:"Times New Roman";font-weight:normal'><o:p>&nbsp;</o:p></span></h4>

<h4><span style='mso-fareast-font-family:"Times New Roman";font-weight:normal'>You
may have other methods in the HashTable class as well. The add method will need
to expand the size of the array and re-hash all the entries when it approaches
becoming full. Use a threshold of 80% full as the trigger for when to re-build
the hash table. The code should calculate the next size using the following
equation: </span><span style='font-family:"American Typewriter";mso-fareast-font-family:
"Times New Roman";font-weight:normal'>new_size = (2 * current_size) + 1</span><span
style='mso-fareast-font-family:"Times New Roman";font-weight:normal'><o:p></o:p></span></h4>

<h4><span style='mso-fareast-font-family:"Times New Roman";font-weight:normal'><o:p>&nbsp;</o:p></span></h4>

<h4><span style='mso-fareast-font-family:"Times New Roman";font-weight:normal'>You
will also need an iterator to traverse the array for multiple reasons, e.g.,
printing, computing similarity (see below). The iterator should skip over array
entries that do not contain data (i.e., null and “RESERVED” positions). <o:p></o:p></span></h4>

<h4><span style='font-size:14.0pt;mso-fareast-font-family:"Times New Roman";
mso-bidi-font-weight:normal'><o:p>&nbsp;</o:p></span></h4>

<h4><span style='font-size:14.0pt;mso-fareast-font-family:"Times New Roman";
mso-bidi-font-weight:normal'><o:p>&nbsp;</o:p></span></h4>

<h4><span class=SpellE><span style='font-size:14.0pt;mso-fareast-font-family:
"Times New Roman";mso-bidi-font-weight:normal'>pruneStopWords</span></span><span
style='font-size:14.0pt;mso-fareast-font-family:"Times New Roman";mso-bidi-font-weight:
normal'> Returns<o:p></o:p></span></h4>

<h4><span style='mso-fareast-font-family:"Times New Roman";font-weight:normal'><o:p>&nbsp;</o:p></span></h4>

<h4><span style='mso-fareast-font-family:"Times New Roman";font-weight:normal'>Your
code needs to be able to delete words from the hash table as well as search and
add. So you need to add pruneStopWords back into the WebPages class. Your code
will not need to find the most frequently appearing words. Now, its argument
will be a String for the word that needs to be removed. The input format is
being changed to include a set of words to be removed from the TermIndex. <o:p></o:p></span></h4>

<h4><span style='mso-fareast-font-family:"Times New Roman";font-weight:normal'><o:p>&nbsp;</o:p></span></h4>

<h4><span style='mso-fareast-font-family:"Times New Roman";font-weight:normal'>The
stop words will immediately follow the files in the input file. As with the
files, each word will be on a separate line and will conclude with a flag: *</span><span
style='mso-fareast-font-family:"Times New Roman"'>STOPs</span><span
style='mso-fareast-font-family:"Times New Roman";font-weight:normal'>*. Call
the method <span class=SpellE>pruneStopWords</span> for each of the words found
in the input file. <o:p></o:p></span></h4>

<h4><span style='mso-fareast-font-family:"Times New Roman";font-weight:normal'><o:p>&nbsp;</o:p></span></h4>

<h4><span style='mso-fareast-font-family:"Times New Roman";font-weight:normal'><o:p>&nbsp;</o:p></span></h4>

<h3 style='margin-left:0in'>Multi-Word Document Retrieval!</h3>

<h3 style='margin-left:0in'><o:p>&nbsp;</o:p></h3>

<p class=MsoNormal><span style='font-size:12.0pt'>At this stage, we have most
pieces in place to retrieve documents using multi-word queries. Your code will
compute a pairwise similarity metric between the query and each document in the
collection. Similarity is based on a measure from Information Retrieval
literature called Cosine Similarity. Warning: the procedure for computing this
is rather complicated. Follow the instructions carefully.<o:p></o:p></span></p>

<p class=MsoNormal><span style='font-size:12.0pt'><o:p>&nbsp;</o:p></span></p>

<p class=MsoNormal><span style='font-size:12.0pt'>For those mathematically
inclined, imagine that each document and the query are represented as vectors
with each term as a dimension and the TFIDF value for the term/document pair as
the value in that dimension. Similarity is the cosine between the query and
each document vector. The one with the biggest value is considered to be the
most similar. The equation is: <o:p></o:p></span></p>

<p class=MsoNormal><span style='font-size:12.0pt'><o:p>&nbsp;</o:p></span></p>

<p class=MsoNormal><!--[if gte msEquation 12]><m:oMathPara><m:oMath><i
  style='mso-bidi-font-style:normal'><span style='font-size:12.0pt;font-family:
  "Cambria Math"'><m:r><span>Sim</m:r></span></span></i><m:d><m:dPr><span
    style='font-size:12.0pt;mso-ansi-font-size:12.0pt;mso-bidi-font-size:12.0pt;
    font-family:"Cambria Math";mso-ascii-font-family:"Cambria Math";mso-hansi-font-family:
    "Cambria Math";font-style:italic;mso-bidi-font-style:normal'><m:ctrlPr></m:ctrlPr></span></m:dPr><m:e><i
    style='mso-bidi-font-style:normal'><span style='font-size:12.0pt;
    font-family:"Cambria Math"'><m:r><span>d</m:r></span><m:r><span>,</m:r></span><m:r><span>q</m:r></span></span></i></m:e></m:d><i
  style='mso-bidi-font-style:normal'><span style='font-size:12.0pt;font-family:
  "Cambria Math"'><m:r><span>=</m:r></span></span></i><m:f><m:fPr><span
    style='font-size:12.0pt;mso-ansi-font-size:12.0pt;mso-bidi-font-size:12.0pt;
    font-family:"Cambria Math";mso-ascii-font-family:"Cambria Math";mso-hansi-font-family:
    "Cambria Math";font-style:italic;mso-bidi-font-style:normal'><m:ctrlPr></m:ctrlPr></span></m:fPr><m:num><m:nary><m:naryPr><m:chr
       m:val="∑"/><m:limLoc m:val="undOvr"/><span style='font-size:12.0pt;
      mso-ansi-font-size:12.0pt;mso-bidi-font-size:12.0pt;font-family:"Cambria Math";
      mso-ascii-font-family:"Cambria Math";mso-hansi-font-family:"Cambria Math";
      font-style:italic;mso-bidi-font-style:normal'><m:ctrlPr></m:ctrlPr></span></m:naryPr><m:sub><i
      style='mso-bidi-font-style:normal'><span style='font-size:12.0pt;
      font-family:"Cambria Math"'><m:r><span>i</m:r></span><m:r><span>=1</m:r></span></span></i></m:sub><m:sup><i
      style='mso-bidi-font-style:normal'><span style='font-size:12.0pt;
      font-family:"Cambria Math"'><m:r><span>t</m:r></span></span></i></m:sup><m:e><m:sSub><m:sSubPr><span
        style='font-size:12.0pt;mso-ansi-font-size:12.0pt;mso-bidi-font-size:
        12.0pt;font-family:"Cambria Math";mso-ascii-font-family:"Cambria Math";
        mso-hansi-font-family:"Cambria Math";font-style:italic;mso-bidi-font-style:
        normal'><m:ctrlPr></m:ctrlPr></span></m:sSubPr><m:e><i
        style='mso-bidi-font-style:normal'><span style='font-size:12.0pt;
        font-family:"Cambria Math"'><m:r><span>w</m:r></span></span></i></m:e><m:sub><i
        style='mso-bidi-font-style:normal'><span style='font-size:12.0pt;
        font-family:"Cambria Math"'><m:r><span>i</m:r></span><m:r><span>,</m:r></span><m:r><span>d</m:r></span></span></i></m:sub></m:sSub><i
      style='mso-bidi-font-style:normal'><span style='font-size:12.0pt;
      font-family:"Cambria Math"'><m:r><span>×</m:r></span></span></i><m:sSub><m:sSubPr><span
        style='font-size:12.0pt;mso-ansi-font-size:12.0pt;mso-bidi-font-size:
        12.0pt;font-family:"Cambria Math";mso-ascii-font-family:"Cambria Math";
        mso-hansi-font-family:"Cambria Math";font-style:italic;mso-bidi-font-style:
        normal'><m:ctrlPr></m:ctrlPr></span></m:sSubPr><m:e><i
        style='mso-bidi-font-style:normal'><span style='font-size:12.0pt;
        font-family:"Cambria Math"'><m:r><span>w</m:r></span></span></i></m:e><m:sub><i
        style='mso-bidi-font-style:normal'><span style='font-size:12.0pt;
        font-family:"Cambria Math"'><m:r><span>i</m:r></span><m:r><span>,</m:r></span><m:r><span>q</m:r></span></span></i></m:sub></m:sSub></m:e></m:nary></m:num><m:den><m:rad><m:radPr><m:degHide
       m:val="on"/><span style='font-size:12.0pt;mso-ansi-font-size:12.0pt;
      mso-bidi-font-size:12.0pt;font-family:"Cambria Math";mso-ascii-font-family:
      "Cambria Math";mso-hansi-font-family:"Cambria Math";font-style:italic;
      mso-bidi-font-style:normal'><m:ctrlPr></m:ctrlPr></span></m:radPr><m:deg></m:deg><m:e><m:nary><m:naryPr><m:chr
         m:val="∑"/><m:limLoc m:val="subSup"/><span style='font-size:12.0pt;
        mso-ansi-font-size:12.0pt;mso-bidi-font-size:12.0pt;font-family:"Cambria Math";
        mso-ascii-font-family:"Cambria Math";mso-hansi-font-family:"Cambria Math";
        font-style:italic;mso-bidi-font-style:normal'><m:ctrlPr></m:ctrlPr></span></m:naryPr><m:sub><i
        style='mso-bidi-font-style:normal'><span style='font-size:12.0pt;
        font-family:"Cambria Math"'><m:r><span>i</m:r></span><m:r><span>=1</m:r></span></span></i></m:sub><m:sup><i
        style='mso-bidi-font-style:normal'><span style='font-size:12.0pt;
        font-family:"Cambria Math"'><m:r><span>t</m:r></span></span></i></m:sup><m:e><m:sSubSup><m:sSubSupPr><span
          style='font-size:12.0pt;mso-ansi-font-size:12.0pt;mso-bidi-font-size:
          12.0pt;font-family:"Cambria Math";mso-ascii-font-family:"Cambria Math";
          mso-hansi-font-family:"Cambria Math";font-style:italic;mso-bidi-font-style:
          normal'><m:ctrlPr></m:ctrlPr></span></m:sSubSupPr><m:e><i
          style='mso-bidi-font-style:normal'><span style='font-size:12.0pt;
          font-family:"Cambria Math"'><m:r><span>w</m:r></span></span></i></m:e><m:sub><i
          style='mso-bidi-font-style:normal'><span style='font-size:12.0pt;
          font-family:"Cambria Math"'><m:r><span>i</m:r></span><m:r><span>,</m:r></span><m:r><span>d</m:r></span></span></i></m:sub><m:sup><i
          style='mso-bidi-font-style:normal'><span style='font-size:12.0pt;
          font-family:"Cambria Math"'><m:r><span>2</m:r></span></span></i></m:sup></m:sSubSup></m:e></m:nary></m:e></m:rad><i
    style='mso-bidi-font-style:normal'><span style='font-size:12.0pt;
    font-family:"Cambria Math"'><m:r><span>×</m:r></span></span></i><m:rad><m:radPr><m:degHide
       m:val="on"/><span style='font-size:12.0pt;mso-ansi-font-size:12.0pt;
      mso-bidi-font-size:12.0pt;font-family:"Cambria Math";mso-ascii-font-family:
      "Cambria Math";mso-hansi-font-family:"Cambria Math";font-style:italic;
      mso-bidi-font-style:normal'><m:ctrlPr></m:ctrlPr></span></m:radPr><m:deg></m:deg><m:e><m:nary><m:naryPr><m:chr
         m:val="∑"/><m:limLoc m:val="undOvr"/><span style='font-size:12.0pt;
        mso-ansi-font-size:12.0pt;mso-bidi-font-size:12.0pt;font-family:"Cambria Math";
        mso-ascii-font-family:"Cambria Math";mso-hansi-font-family:"Cambria Math";
        font-style:italic;mso-bidi-font-style:normal'><m:ctrlPr></m:ctrlPr></span></m:naryPr><m:sub><i
        style='mso-bidi-font-style:normal'><span style='font-size:12.0pt;
        font-family:"Cambria Math"'><m:r><span>i</m:r></span><m:r><span>=1</m:r></span></span></i></m:sub><m:sup><i
        style='mso-bidi-font-style:normal'><span style='font-size:12.0pt;
        font-family:"Cambria Math"'><m:r><span>t</m:r></span></span></i></m:sup><m:e><m:sSubSup><m:sSubSupPr><span
          style='font-size:12.0pt;mso-ansi-font-size:12.0pt;mso-bidi-font-size:
          12.0pt;font-family:"Cambria Math";mso-ascii-font-family:"Cambria Math";
          mso-hansi-font-family:"Cambria Math";font-style:italic;mso-bidi-font-style:
          normal'><m:ctrlPr></m:ctrlPr></span></m:sSubSupPr><m:e><i
          style='mso-bidi-font-style:normal'><span style='font-size:12.0pt;
          font-family:"Cambria Math"'><m:r><span>w</m:r></span></span></i></m:e><m:sub><i
          style='mso-bidi-font-style:normal'><span style='font-size:12.0pt;
          font-family:"Cambria Math"'><m:r><span>i</m:r></span><m:r><span>,</m:r></span><m:r><span>q</m:r></span></span></i></m:sub><m:sup><i
          style='mso-bidi-font-style:normal'><span style='font-size:12.0pt;
          font-family:"Cambria Math"'><m:r><span>2</m:r></span></span></i></m:sup></m:sSubSup></m:e></m:nary></m:e></m:rad></m:den></m:f></m:oMath></m:oMathPara><![endif]--><![if !msEquation]><span
style='font-size:10.0pt;font-family:Times;mso-fareast-font-family:"ＭＳ 明朝";
mso-fareast-theme-font:minor-fareast;mso-bidi-font-family:"Times New Roman";
mso-bidi-theme-font:minor-bidi;mso-ansi-language:EN-US;mso-fareast-language:
EN-US;mso-bidi-language:AR-SA'><img width=184 height=49 id="_x0000_i1025"
src="image401.png"></span><![endif]><span style='font-size:12.0pt'><o:p></o:p></span></p>

<p class=MsoNormal><!--[if gte msEquation 12]><m:oMathPara><m:oMath><m:sSub><m:sSubPr><span
    style='font-size:12.0pt;mso-ansi-font-size:12.0pt;mso-bidi-font-size:12.0pt;
    font-family:"Cambria Math";mso-ascii-font-family:"Cambria Math";mso-hansi-font-family:
    "Cambria Math";font-style:italic;mso-bidi-font-style:normal'><m:ctrlPr></m:ctrlPr></span></m:sSubPr><m:e><i
    style='mso-bidi-font-style:normal'><span style='font-size:12.0pt;
    font-family:"Cambria Math"'><m:r><span>w</m:r></span></span></i></m:e><m:sub><i
    style='mso-bidi-font-style:normal'><span style='font-size:12.0pt;
    font-family:"Cambria Math"'><m:r><span>i</m:r></span><m:r><span>,</m:r></span><m:r><span>d</m:r></span></span></i></m:sub></m:sSub><i
  style='mso-bidi-font-style:normal'><span style='font-size:12.0pt;font-family:
  "Cambria Math"'><m:r><span>=</m:r></span></span></i><m:sSub><m:sSubPr><span
    style='font-size:12.0pt;mso-ansi-font-size:12.0pt;mso-bidi-font-size:12.0pt;
    font-family:"Cambria Math";mso-ascii-font-family:"Cambria Math";mso-hansi-font-family:
    "Cambria Math";font-style:italic;mso-bidi-font-style:normal'><m:ctrlPr></m:ctrlPr></span></m:sSubPr><m:e><i
    style='mso-bidi-font-style:normal'><span style='font-size:12.0pt;
    font-family:"Cambria Math"'><m:r><span>tf</m:r></span></span></i></m:e><m:sub><i
    style='mso-bidi-font-style:normal'><span style='font-size:12.0pt;
    font-family:"Cambria Math"'><m:r><span>i</m:r></span><m:r><span>,</m:r></span><m:r><span>d</m:r></span></span></i></m:sub></m:sSub><i
  style='mso-bidi-font-style:normal'><span style='font-size:12.0pt;font-family:
  "Cambria Math"'><m:r><span>×</m:r></span></span></i><m:func><m:funcPr><span
    style='font-size:12.0pt;mso-ansi-font-size:12.0pt;mso-bidi-font-size:12.0pt;
    font-family:"Cambria Math";mso-ascii-font-family:"Cambria Math";mso-hansi-font-family:
    "Cambria Math";font-style:italic;mso-bidi-font-style:normal'><m:ctrlPr></m:ctrlPr></span></m:funcPr><m:fName><span
    style='font-family:"Cambria Math"'><m:r><span><m:rPr><m:scr m:val="roman"/><m:sty
       m:val="p"/></m:rPr>ln</m:r></span></span></m:fName><m:e><i
    style='mso-bidi-font-style:normal'><span style='font-size:12.0pt;
    font-family:"Cambria Math"'><m:r><span>(</m:r></span><m:r><span>n</m:r></span><m:r><span>/</m:r></span></span></i><m:sSub><m:sSubPr><span
      style='font-size:12.0pt;mso-ansi-font-size:12.0pt;mso-bidi-font-size:
      12.0pt;font-family:"Cambria Math";mso-ascii-font-family:"Cambria Math";
      mso-hansi-font-family:"Cambria Math";font-style:italic;mso-bidi-font-style:
      normal'><m:ctrlPr></m:ctrlPr></span></m:sSubPr><m:e><i style='mso-bidi-font-style:
      normal'><span style='font-size:12.0pt;font-family:"Cambria Math"'><m:r><span>df</m:r></span></span></i></m:e><m:sub><i
      style='mso-bidi-font-style:normal'><span style='font-size:12.0pt;
      font-family:"Cambria Math"'><m:r><span>i</m:r></span></span></i></m:sub></m:sSub></m:e></m:func><i
  style='mso-bidi-font-style:normal'><span style='font-size:12.0pt;font-family:
  "Cambria Math"'><m:r><span>)</m:r></span></span></i></m:oMath></m:oMathPara><![endif]--><![if !msEquation]><span
style='font-size:10.0pt;font-family:Times;mso-fareast-font-family:"ＭＳ 明朝";
mso-fareast-theme-font:minor-fareast;mso-bidi-font-family:"Times New Roman";
mso-bidi-theme-font:minor-bidi;mso-ansi-language:EN-US;mso-fareast-language:
EN-US;mso-bidi-language:AR-SA'><img width=115 height=17 id="_x0000_i1025"
src="image402.png"></span><![endif]><span style='font-size:12.0pt'><o:p></o:p></span></p>

<p class=MsoNormal><!--[if gte msEquation 12]><m:oMathPara><m:oMath><m:sSub><m:sSubPr><span
    style='font-size:12.0pt;mso-ansi-font-size:12.0pt;mso-bidi-font-size:12.0pt;
    font-family:"Cambria Math";mso-ascii-font-family:"Cambria Math";mso-hansi-font-family:
    "Cambria Math";font-style:italic;mso-bidi-font-style:normal'><m:ctrlPr></m:ctrlPr></span></m:sSubPr><m:e><i
    style='mso-bidi-font-style:normal'><span style='font-size:12.0pt;
    font-family:"Cambria Math"'><m:r><span>w</m:r></span></span></i></m:e><m:sub><i
    style='mso-bidi-font-style:normal'><span style='font-size:12.0pt;
    font-family:"Cambria Math"'><m:r><span>i</m:r></span><m:r><span>,</m:r></span><m:r><span>q</m:r></span></span></i></m:sub></m:sSub><i
  style='mso-bidi-font-style:normal'><span style='font-size:12.0pt;font-family:
  "Cambria Math"'><m:r><span>=.5×(1+</m:r></span></span></i><m:func><m:funcPr><span
    style='font-size:12.0pt;mso-ansi-font-size:12.0pt;mso-bidi-font-size:12.0pt;
    font-family:"Cambria Math";mso-ascii-font-family:"Cambria Math";mso-hansi-font-family:
    "Cambria Math";font-style:italic;mso-bidi-font-style:normal'><m:ctrlPr></m:ctrlPr></span></m:funcPr><m:fName><span
    style='font-family:"Cambria Math"'><m:r><span><m:rPr><m:scr m:val="roman"/><m:sty
       m:val="p"/></m:rPr>ln</m:r></span></span></m:fName><m:e><i
    style='mso-bidi-font-style:normal'><span style='font-size:12.0pt;
    font-family:"Cambria Math"'><m:r><span>(</m:r></span><m:r><span>n</m:r></span><m:r><span>/</m:r></span></span></i><m:sSub><m:sSubPr><span
      style='font-size:12.0pt;mso-ansi-font-size:12.0pt;mso-bidi-font-size:
      12.0pt;font-family:"Cambria Math";mso-ascii-font-family:"Cambria Math";
      mso-hansi-font-family:"Cambria Math";font-style:italic;mso-bidi-font-style:
      normal'><m:ctrlPr></m:ctrlPr></span></m:sSubPr><m:e><i style='mso-bidi-font-style:
      normal'><span style='font-size:12.0pt;font-family:"Cambria Math"'><m:r><span>df</m:r></span></span></i></m:e><m:sub><i
      style='mso-bidi-font-style:normal'><span style='font-size:12.0pt;
      font-family:"Cambria Math"'><m:r><span>i</m:r></span></span></i></m:sub></m:sSub></m:e></m:func><i
  style='mso-bidi-font-style:normal'><span style='font-size:12.0pt;font-family:
  "Cambria Math"'><m:r><span>))</m:r></span></span></i></m:oMath></m:oMathPara><![endif]--><![if !msEquation]><span
style='font-size:10.0pt;font-family:Times;mso-fareast-font-family:"ＭＳ 明朝";
mso-fareast-theme-font:minor-fareast;mso-bidi-font-family:"Times New Roman";
mso-bidi-theme-font:minor-bidi;mso-ansi-language:EN-US;mso-fareast-language:
EN-US;mso-bidi-language:AR-SA'><img width=132 height=17 id="_x0000_i1025"
src="image403.png"></span><![endif]><span style='font-size:12.0pt'><o:p></o:p></span></p>

<p class=MsoNormal style='mso-pagination:none;mso-layout-grid-align:none;
text-autospace:none'><span style='font-size:16.0pt;font-family:"Times New Roman";
mso-fareast-font-family:"Times New Roman";mso-bidi-font-family:"Times New Roman";
mso-bidi-theme-font:minor-bidi'>&nbsp;</span><span style='font-size:13.0pt;
font-family:"Times New Roman";mso-fareast-font-family:"Times New Roman";
mso-bidi-font-family:"Times New Roman";mso-bidi-theme-font:minor-bidi'><o:p></o:p></span></p>

<p class=MsoNormal style='mso-pagination:none;mso-layout-grid-align:none;
text-autospace:none'><i><span style='font-size:12.0pt;font-family:"Times New Roman";
mso-fareast-font-family:"Times New Roman";mso-bidi-font-family:"Times New Roman";
mso-bidi-theme-font:minor-bidi'>d </span></i><span style='font-size:12.0pt;
font-family:"Times New Roman";mso-fareast-font-family:"Times New Roman";
mso-bidi-font-family:"Times New Roman";mso-bidi-theme-font:minor-bidi'>is a
particular document (filename, webpage), <i>q</i> is the query, <i>t</i> is the
number of unique terms found across all documents. The weights per term and
document (<i>wi,d</i>) is what you computed in PA3 (<i>tf x idf)</i>. <i>n</i>
is the number of documents read in. <i>df<sub>i</sub></i> is the number of
documents that contain the particular term <i style='mso-bidi-font-style:normal'>i</i>.&nbsp;Use
</span><span style='font-size:12.0pt;font-family:"American Typewriter";
mso-fareast-font-family:"Times New Roman"'>Math.log</span><span
style='font-size:12.0pt;font-family:"Times New Roman";mso-fareast-font-family:
"Times New Roman";mso-bidi-font-family:"Times New Roman";mso-bidi-theme-font:
minor-bidi'> for <span class=SpellE>ln.</span> <o:p></o:p></span></p>

<p class=MsoNormal style='mso-pagination:none;mso-layout-grid-align:none;
text-autospace:none'><span style='font-size:12.0pt;font-family:"Times New Roman";
mso-fareast-font-family:"Times New Roman";mso-bidi-font-family:"Times New Roman";
mso-bidi-theme-font:minor-bidi'>&nbsp;<o:p></o:p></span></p>

<p class=MsoNormal style='mso-pagination:none;mso-layout-grid-align:none;
text-autospace:none'><span style='font-size:12.0pt;font-family:"Times New Roman";
mso-fareast-font-family:"Times New Roman";mso-bidi-font-family:"Times New Roman";
mso-bidi-theme-font:minor-bidi'>For those not mathematically inclined, follow
the directions below carefully! For those who are interested, look up “cosine
similarity” and read any of the tutorials on the subject available on the Web. <o:p></o:p></span></p>

<p class=MsoNormal style='mso-pagination:none;mso-layout-grid-align:none;
text-autospace:none'><span style='font-size:12.0pt;font-family:"Times New Roman";
mso-fareast-font-family:"Times New Roman";mso-bidi-font-family:"Times New Roman";
mso-bidi-theme-font:minor-bidi'>&nbsp;<o:p></o:p></span></p>

<p class=MsoNormal style='mso-pagination:none;mso-layout-grid-align:none;
text-autospace:none'><span style='font-size:12.0pt;font-family:"Times New Roman";
mso-fareast-font-family:"Times New Roman";mso-bidi-font-family:"Times New Roman";
mso-bidi-theme-font:minor-bidi'>A query is a set of words. Each query will be
on a separate line in the input file; multiple words may be on the same line.<o:p></o:p></span></p>

<p class=MsoNormal style='mso-pagination:none;mso-layout-grid-align:none;
text-autospace:none'><span style='font-size:16.0pt;font-family:"Times New Roman";
mso-fareast-font-family:"Times New Roman";mso-bidi-font-family:"Times New Roman";
mso-bidi-theme-font:minor-bidi'><o:p>&nbsp;</o:p></span></p>

<h4>Supportive Data Structures</h4>

<p class=MsoNormal style='margin-left:22.5pt;mso-pagination:none;mso-layout-grid-align:
none;text-autospace:none'><b style='mso-bidi-font-weight:normal'><span
style='font-size:13.0pt;font-family:"Times New Roman";mso-fareast-font-family:
"Times New Roman";mso-bidi-font-family:"Times New Roman";mso-bidi-theme-font:
minor-bidi'><o:p>&nbsp;</o:p></span></b></p>

<p class=MsoNormal><span style='font-size:12.0pt'>For each query, the <span
class=SpellE>Sim</span> equation is computed for each document. Then the
document with the highest value is chosen as the best page for the query. To
support this computation, create arrays (in mathematics, these would be “vectors”)
for each of the components of the <span class=SpellE>Sim</span> equation:<o:p></o:p></span></p>

<p class=MsoListParagraphCxSpFirst style='text-indent:-.25in;mso-list:l14 level1 lfo17'><![if !supportLists]><span
style='font-size:12.0pt;font-family:Symbol;mso-fareast-font-family:Symbol;
mso-bidi-font-family:Symbol'><span style='mso-list:Ignore'>·<span
style='font:7.0pt "Times New Roman"'>&nbsp;&nbsp;&nbsp;&nbsp;&nbsp; </span></span></span><![endif]><span
style='font-size:12.0pt;font-family:"American Typewriter"'>docs</span><span
style='font-size:12.0pt'>: supports a mapping between the positions in the
component arrays and which documents are being referenced [<b style='mso-bidi-font-weight:
normal'>Hint</b>: keep it sorted for easy access]<o:p></o:p></span></p>

<p class=MsoListParagraphCxSpMiddle style='text-indent:-.25in;mso-list:l14 level1 lfo17'><![if !supportLists]><span
style='font-size:12.0pt;font-family:Symbol;mso-fareast-font-family:Symbol;
mso-bidi-font-family:Symbol'><span style='mso-list:Ignore'>·<span
style='font:7.0pt "Times New Roman"'>&nbsp;&nbsp;&nbsp;&nbsp;&nbsp; </span></span></span><![endif]><span
style='font-size:12.0pt;font-family:"American Typewriter"'>common</span><span
style='font-size:12.0pt'>: keeps the numerators<o:p></o:p></span></p>

<p class=MsoListParagraphCxSpMiddle style='text-indent:-.25in;mso-list:l14 level1 lfo17'><![if !supportLists]><span
style='font-size:12.0pt;font-family:Symbol;mso-fareast-font-family:Symbol;
mso-bidi-font-family:Symbol'><span style='mso-list:Ignore'>·<span
style='font:7.0pt "Times New Roman"'>&nbsp;&nbsp;&nbsp;&nbsp;&nbsp; </span></span></span><![endif]><span
class=SpellE><span style='font-size:12.0pt;font-family:"American Typewriter"'>docSpecific</span></span><span
style='font-size:12.0pt'>: keeps the first summation in the denominators<o:p></o:p></span></p>

<p class=MsoListParagraphCxSpMiddle><span style='font-size:12.0pt'><o:p>&nbsp;</o:p></span></p>

<p class=MsoListParagraphCxSpMiddle style='margin-left:0in;mso-add-space:auto'><span
style='font-size:12.0pt'>The size of these arrays will be equal to the number
of documents that have been read in. So the first position in </span><span
style='font-size:12.0pt;font-family:"American Typewriter"'>docs</span><span
style='font-size:12.0pt'> will hold the name of the document that corresponds
to the values in the first position in </span><span style='font-size:12.0pt;
font-family:"American Typewriter"'>common</span><span style='font-size:12.0pt'>
and </span><span class=SpellE><span style='font-size:12.0pt;font-family:"American Typewriter"'>docSpecific</span></span><span
style='font-size:12.0pt'>. Initialize all positions in the weight arrays to 0. <o:p></o:p></span></p>

<p class=MsoListParagraphCxSpMiddle style='margin-left:0in;mso-add-space:auto'><span
style='font-size:12.0pt'><o:p>&nbsp;</o:p></span></p>

<p class=MsoListParagraphCxSpLast style='margin-left:0in;mso-add-space:auto'><span
style='font-size:12.0pt'>The second summation in the denominator is a scalar
(single value) because there is only one query and so can be accumulated in a
variable of type </span><span style='font-size:12.0pt;font-family:"American Typewriter"'>double</span><span
style='font-size:12.0pt'>. In the procedure below, this variable is called </span><span
class=SpellE><span style='font-size:12.0pt;font-family:"American Typewriter"'>queryWeights</span></span><span
style='font-size:12.0pt'>.<o:p></o:p></span></p>

<p class=MsoNormal><span style='font-size:12.0pt'><o:p>&nbsp;</o:p></span></p>

<p class=MsoNormal style='mso-pagination:none;mso-layout-grid-align:none;
text-autospace:none'><b style='mso-bidi-font-weight:normal'><span
style='font-size:13.0pt;font-family:"Times New Roman";mso-fareast-font-family:
"Times New Roman";mso-bidi-font-family:"Times New Roman";mso-bidi-theme-font:
minor-bidi'>Algorithm for Computing Cosine Similarity<o:p></o:p></span></b></p>

<p class=MsoNormal style='mso-pagination:none;mso-layout-grid-align:none;
text-autospace:none'><b style='mso-bidi-font-weight:normal'><span
style='font-size:13.0pt;font-family:"Times New Roman";mso-fareast-font-family:
"Times New Roman";mso-bidi-font-family:"Times New Roman";mso-bidi-theme-font:
minor-bidi'><o:p>&nbsp;</o:p></span></b></p>

<p class=MsoNormal style='mso-pagination:none;mso-layout-grid-align:none;
text-autospace:none'><span style='font-size:12.0pt;font-family:"Times New Roman";
mso-fareast-font-family:"Times New Roman";mso-bidi-font-family:"Times New Roman";
mso-bidi-theme-font:minor-bidi'>You will need to add a new Method to WebPages: </span><span
style='font-size:12.0pt;font-family:"American Typewriter";mso-fareast-font-family:
"Times New Roman"'>bestPages</span><span style='font-size:12.0pt;font-family:
"Times New Roman";mso-fareast-font-family:"Times New Roman";mso-bidi-font-family:
"Times New Roman";mso-bidi-theme-font:minor-bidi'> which determines which page
is most similar to a query (i.e., the doc with the highest value of Sim(doc,
query)) and also provides the cosine similarity computation for it. <o:p></o:p></span></p>

<p class=MsoNormal style='mso-pagination:none;mso-layout-grid-align:none;
text-autospace:none'><span style='font-size:13.0pt;font-family:"Times New Roman";
mso-fareast-font-family:"Times New Roman";mso-bidi-font-family:"Times New Roman";
mso-bidi-theme-font:minor-bidi'>&nbsp;<o:p></o:p></span></p>

<p class=MsoNormal style='mso-pagination:none;mso-layout-grid-align:none;
text-autospace:none'><span style='font-size:12.0pt;font-family:"Times New Roman";
mso-fareast-font-family:"Times New Roman";mso-bidi-font-family:"Times New Roman";
mso-bidi-theme-font:minor-bidi'>My algorithm for the similarity comparison is:<o:p></o:p></span></p>

<p class=MsoListParagraphCxSpFirst style='text-indent:-.25in;mso-pagination:
none;mso-list:l13 level1 lfo18;tab-stops:11.0pt;mso-layout-grid-align:none;
text-autospace:none'><![if !supportLists]><span style='font-size:12.0pt;
font-family:"Times New Roman";mso-fareast-font-family:"Times New Roman"'><span
style='mso-list:Ignore'>1.<span style='font:7.0pt "Times New Roman"'>&nbsp;&nbsp;&nbsp;&nbsp;
</span></span></span><![endif]><span style='font-size:12.0pt;font-family:"Times New Roman";
mso-fareast-font-family:"Times New Roman";mso-bidi-font-family:"Times New Roman";
mso-bidi-theme-font:minor-bidi'>Create arrays to hold the various weights and
summed weights as specified above. <o:p></o:p></span></p>

<p class=MsoListParagraphCxSpMiddle style='text-indent:-.25in;mso-pagination:
none;mso-list:l13 level1 lfo18;tab-stops:11.0pt;mso-layout-grid-align:none;
text-autospace:none'><![if !supportLists]><span style='font-size:12.0pt;
font-family:"Times New Roman";mso-fareast-font-family:"Times New Roman"'><span
style='mso-list:Ignore'>2.<span style='font:7.0pt "Times New Roman"'>&nbsp;&nbsp;&nbsp;&nbsp;
</span></span></span><![endif]><span style='font-size:12.0pt;font-family:"Times New Roman";
mso-fareast-font-family:"Times New Roman";mso-bidi-font-family:"Times New Roman";
mso-bidi-theme-font:minor-bidi'>Traverse over the term index. For each term <span
class=SpellE><i>i</i></span>:<o:p></o:p></span></p>

<p class=MsoListParagraphCxSpMiddle style='margin-left:1.0in;mso-add-space:
auto;text-indent:-.25in;mso-pagination:none;mso-list:l13 level2 lfo18;
tab-stops:11.0pt;mso-layout-grid-align:none;text-autospace:none'><![if !supportLists]><span
style='font-size:12.0pt;font-family:"Times New Roman";mso-fareast-font-family:
"Times New Roman"'><span style='mso-list:Ignore'>a.<span style='font:7.0pt "Times New Roman"'>&nbsp;&nbsp;&nbsp;&nbsp;
</span></span></span><![endif]><span style='font-size:12.0pt;font-family:"Times New Roman";
mso-fareast-font-family:"Times New Roman";mso-bidi-font-family:"Times New Roman";
mso-bidi-theme-font:minor-bidi'>If the term is in the query, compute wi,q as
above (meaning use the wiq equation and <i>square it</i>) and add it to </span><span class=SpellE><span style='font-size:12.0pt;
font-family:"American Typewriter";mso-fareast-font-family:"Times New Roman";
mso-bidi-font-family:"American Typewriter"'>queryWeights</span></span><span
style='font-size:12.0pt;font-family:"Times New Roman";mso-fareast-font-family:
"Times New Roman";mso-bidi-font-family:"Times New Roman";mso-bidi-theme-font:
minor-bidi'> [Note: <span style='font-size:12.0pt;
font-family:"American Typewriter";mso-fareast-font-family:"Times New Roman";
mso-bidi-font-family:"American Typewriter"'>queryWeights</span> is only used to compute the second term in the denominator]<o:p></o:p></span></p>

<p class=MsoListParagraphCxSpMiddle style='margin-left:1.0in;mso-add-space:
auto;text-indent:-.25in;mso-pagination:none;mso-list:l13 level2 lfo18;
tab-stops:11.0pt;mso-layout-grid-align:none;text-autospace:none'><![if !supportLists]><span
style='font-size:12.0pt;font-family:"Times New Roman";mso-fareast-font-family:
"Times New Roman"'><span style='mso-list:Ignore'>b.<span style='font:7.0pt "Times New Roman"'>&nbsp;&nbsp;&nbsp;&nbsp;
</span></span></span><![endif]><span style='font-size:12.0pt;font-family:"Times New Roman";
mso-fareast-font-family:"Times New Roman";mso-bidi-font-family:"Times New Roman";
mso-bidi-theme-font:minor-bidi'>For each document <i>d </i>that contains term <span
class=SpellE><i>i</i></span><i>:</i><o:p></o:p></span></p>

<p class=MsoListParagraphCxSpMiddle style='margin-left:1.5in;mso-add-space:
auto;text-indent:-1.5in;mso-text-indent-alt:-9.0pt;mso-pagination:none;
mso-list:l13 level3 lfo18;tab-stops:11.0pt;mso-layout-grid-align:none;
text-autospace:none'><![if !supportLists]><span style='font-size:12.0pt;
font-family:"Times New Roman";mso-fareast-font-family:"Times New Roman"'><span
style='mso-list:Ignore'><span style='font:7.0pt "Times New Roman"'>&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;
</span>i.<span style='font:7.0pt "Times New Roman"'>&nbsp;&nbsp;&nbsp;&nbsp; </span></span></span><![endif]><span
style='font-size:12.0pt;font-family:"Times New Roman";mso-fareast-font-family:
"Times New Roman";mso-bidi-font-family:"Times New Roman";mso-bidi-theme-font:
minor-bidi'>Compute the tfidf value (wi,d), square it and add it to the value
in </span><span style='font-size:12.0pt;font-family:"American Typewriter";
mso-fareast-font-family:"Times New Roman";mso-bidi-font-family:"American Typewriter"'>docSpecific</span><span
style='font-size:12.0pt;font-family:"Times New Roman";mso-fareast-font-family:
"Times New Roman";mso-bidi-font-family:"Times New Roman";mso-bidi-theme-font:
minor-bidi'> in the position for doc <i>d</i><span style='mso-bidi-font-style:
italic'> [<b style='mso-bidi-font-weight:normal'>Hint</b>: you should be able
to use </span></span><span class=SpellE><span style='font-size:12.0pt;
font-family:"American Typewriter";mso-fareast-font-family:"Times New Roman";
mso-bidi-font-style:italic'>whichPages</span></span><span style='font-size:
12.0pt;font-family:"Times New Roman";mso-fareast-font-family:"Times New Roman";
mso-bidi-font-family:"Times New Roman";mso-bidi-theme-font:minor-bidi;
mso-bidi-font-style:italic'> from PA3 or something very much like it to do this
part]</span><span style='font-size:12.0pt;font-family:"Times New Roman";
mso-fareast-font-family:"Times New Roman";mso-bidi-font-family:"Times New Roman";
mso-bidi-theme-font:minor-bidi'><o:p></o:p></span></p>

<p class=MsoListParagraphCxSpMiddle style='margin-left:1.5in;mso-add-space:
auto;text-indent:-1.5in;mso-text-indent-alt:-9.0pt;mso-pagination:none;
mso-list:l13 level3 lfo18;tab-stops:11.0pt;mso-layout-grid-align:none;
text-autospace:none'><![if !supportLists]><span style='font-size:12.0pt;
font-family:"Times New Roman";mso-fareast-font-family:"Times New Roman"'><span
style='mso-list:Ignore'><span style='font:7.0pt "Times New Roman"'>&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;
</span>ii.<span style='font:7.0pt "Times New Roman"'>&nbsp;&nbsp;&nbsp;&nbsp; </span></span></span><![endif]><span
style='font-size:12.0pt;font-family:"Times New Roman";mso-fareast-font-family:
"Times New Roman";mso-bidi-font-family:"Times New Roman";mso-bidi-theme-font:
minor-bidi'>If the term is in both query and document <i>d</i>, mutiply wi,d
with wi,q&nbsp; and add it to the value in </span><span style='font-size:12.0pt;
font-family:"American Typewriter";mso-fareast-font-family:"Times New Roman";
mso-bidi-font-family:"American Typewriter"'>common</span><span
style='font-size:12.0pt;font-family:"Times New Roman";mso-fareast-font-family:
"Times New Roman";mso-bidi-font-family:"Times New Roman";mso-bidi-theme-font:
minor-bidi'> in the position for document <i>d</i><o:p></o:p></span></p>

<p class=MsoListParagraphCxSpMiddle style='text-indent:-.25in;mso-pagination:
none;mso-list:l13 level1 lfo18;tab-stops:11.0pt;mso-layout-grid-align:none;
text-autospace:none'><![if !supportLists]><span style='font-size:12.0pt;
font-family:"Times New Roman";mso-fareƒast-font-family:"Times New Roman"'><span
style='mso-list:Ignore'>3.<span style='font:7.0pt "Times New Roman"'>&nbsp;&nbsp;&nbsp;&nbsp;
</span></span></span><![endif]><span style='font-size:12.0pt;font-family:"Times New Roman";
mso-fareast-font-family:"Times New Roman";mso-bidi-font-family:"Times New Roman";
mso-bidi-theme-font:minor-bidi'>For each document <i style='mso-bidi-font-style:
normal'>d</i>:<o:p></o:p></span></p>

<p class=MsoListParagraphCxSpMiddle style='margin-left:1.0in;mso-add-space:
auto;text-indent:-.25in;mso-pagination:none;mso-list:l13 level2 lfo18;
tab-stops:11.0pt;mso-layout-grid-align:none;text-autospace:none'><![if !supportLists]><span
style='font-size:12.0pt;font-family:"Times New Roman";mso-fareast-font-family:
"Times New Roman"'><span style='mso-list:Ignore'>a.<span style='font:7.0pt "Times New Roman"'>&nbsp;&nbsp;&nbsp;&nbsp;
</span></span></span><![endif]><span style='font-size:12.0pt;font-family:"Times New Roman";
mso-fareast-font-family:"Times New Roman";mso-bidi-font-family:"Times New Roman";
mso-bidi-theme-font:minor-bidi'>Compute <span class=SpellE>sim</span>(<span
class=SpellE>d,q</span>) with: common[d] / (<span class=SpellE>sqrt</span>(<span
class=SpellE>docSpecific</span>[d]))*(<span class=SpellE>sqrt</span>(<span
class=SpellE>queryWeights</span>))<o:p></o:p></span></p>

<p class=MsoListParagraphCxSpMiddle style='margin-left:1.0in;mso-add-space:
auto;text-indent:-.25in;mso-pagination:none;mso-list:l13 level2 lfo18;
tab-stops:11.0pt;mso-layout-grid-align:none;text-autospace:none'><![if !supportLists]><span
style='font-size:12.0pt;font-family:"Times New Roman";mso-fareast-font-family:
"Times New Roman"'><span style='mso-list:Ignore'>b.<span style='font:7.0pt "Times New Roman"'>&nbsp;&nbsp;&nbsp;&nbsp;
</span></span></span><![endif]><span style='font-size:12.0pt;font-family:"Times New Roman";
mso-fareast-font-family:"Times New Roman";mso-bidi-font-family:"Times New Roman";
mso-bidi-theme-font:minor-bidi'>Keep track of which document has the highest
sim value<o:p></o:p></span></p>

<p class=MsoListParagraphCxSpLast style='text-indent:-.25in;mso-pagination:
none;mso-list:l13 level1 lfo18;tab-stops:11.0pt;mso-layout-grid-align:none;
text-autospace:none'><![if !supportLists]><span style='font-size:12.0pt;
font-family:"Times New Roman";mso-fareast-font-family:"Times New Roman"'><span
style='mso-list:Ignore'>4.<span style='font:7.0pt "Times New Roman"'>&nbsp;&nbsp;&nbsp;&nbsp;
</span></span></span><![endif]><span style='font-size:12.0pt;font-family:"Times New Roman";
mso-fareast-font-family:"Times New Roman";mso-bidi-font-family:"Times New Roman";
mso-bidi-theme-font:minor-bidi'>Return the document name and sim of the
document that has the highest <span class=SpellE>sim</span> value. Note: in the
case of ties, return the document name that is towards the end alphabetically
(i.e., assuming you are traversing the docs in alphabetical order, return the
last one encountered.)<o:p></o:p></span></p>

<p class=MsoNormal style='mso-pagination:none;mso-layout-grid-align:none;
text-autospace:none'><span style='font-size:12.0pt;font-family:"Times New Roman";
mso-fareast-font-family:"Times New Roman";mso-bidi-font-family:"Times New Roman";
mso-bidi-theme-font:minor-bidi'>&nbsp;<o:p></o:p></span></p>

<p class=MsoNormal style='mso-pagination:none;mso-layout-grid-align:none;
text-autospace:none'><span style='font-size:12.0pt;font-family:"Times New Roman";
mso-fareast-font-family:"Times New Roman";mso-bidi-font-family:"Times New Roman";
mso-bidi-theme-font:minor-bidi'>A few observations: Because similarity is
relative, you really need at least three documents before documents might
appear similar. If you have two identical documents only, then every position
in their TFIDF vectors will be 0.<o:p></o:p></span></p>

<p class=MsoNormal style='mso-pagination:none;mso-layout-grid-align:none;
text-autospace:none'><b><span style='font-size:12.0pt;font-family:"Times New Roman";
mso-fareast-font-family:"Times New Roman";mso-bidi-font-family:"Times New Roman";
mso-bidi-theme-font:minor-bidi'>&nbsp;<o:p></o:p></span></b></p>

<p class=MsoNormal style='mso-pagination:none;mso-layout-grid-align:none;
text-autospace:none'><b><span style='font-size:12.0pt;font-family:"Times New Roman";
mso-fareast-font-family:"Times New Roman";mso-bidi-font-family:"Times New Roman";
mso-bidi-theme-font:minor-bidi'>Hint: </span></b><span style='font-size:12.0pt;
font-family:"Times New Roman";mso-fareast-font-family:"Times New Roman";
mso-bidi-font-family:"Times New Roman";mso-bidi-theme-font:minor-bidi'>To
simplify searching for terms, you should sort the words in the query and should
store them together as an array or ArrayList.<b><o:p></o:p></b></span></p>

<p class=MsoNormal style='mso-pagination:none;mso-layout-grid-align:none;
text-autospace:none'><b><span style='font-size:16.0pt;font-family:"Times New Roman";
mso-fareast-font-family:"Times New Roman";mso-bidi-font-family:"Times New Roman";
mso-bidi-theme-font:minor-bidi'>&nbsp;<o:p></o:p></span></b></p>

<p class=MsoNormal style='mso-pagination:none;mso-layout-grid-align:none;
text-autospace:none'><b><span style='font-size:14.0pt;font-family:"Times New Roman";
mso-fareast-font-family:"Times New Roman";mso-bidi-font-family:"Times New Roman";
mso-bidi-theme-font:minor-bidi'>Main Program: PA4<o:p></o:p></span></b></p>

<p class=MsoNormal style='mso-pagination:none;mso-layout-grid-align:none;
text-autospace:none'><span style='font-size:12.0pt;font-family:"Times New Roman";
mso-fareast-font-family:"Times New Roman";mso-bidi-font-family:"Times New Roman";
mso-bidi-theme-font:minor-bidi'>PA4 should work as before up to handling the
queries. At that point, it should read queries one at a time and print the name
of the document and its sim value as in the output example.<o:p></o:p></span></p>

<p class=MsoNormal style='mso-pagination:none;mso-layout-grid-align:none;
text-autospace:none'><span style='font-size:13.0pt;font-family:"Times New Roman";
mso-fareast-font-family:"Times New Roman";mso-bidi-font-family:"Times New Roman";
mso-bidi-theme-font:minor-bidi'>&nbsp;<o:p></o:p></span></p>

<p class=MsoNormal style='mso-pagination:none;mso-layout-grid-align:none;
text-autospace:none'><b><span style='font-size:16.0pt;font-family:"Times New Roman";
mso-fareast-font-family:"Times New Roman";mso-bidi-font-family:"Times New Roman";
mso-bidi-theme-font:minor-bidi'>Examples<o:p></o:p></span></b></p>

<p class=MsoNormal style='mso-pagination:none;mso-layout-grid-align:none;
text-autospace:none'><span style='font-size:16.0pt;font-family:"Times New Roman";
mso-fareast-font-family:"Times New Roman";mso-bidi-font-family:"Times New Roman";
mso-bidi-theme-font:minor-bidi'>&nbsp;</span><span style='font-size:13.0pt;
font-family:"Times New Roman";mso-fareast-font-family:"Times New Roman";
mso-bidi-font-family:"Times New Roman";mso-bidi-theme-font:minor-bidi'><o:p></o:p></span></p>

<p class=MsoNormal style='mso-pagination:none;mso-layout-grid-align:none;
text-autospace:none'><span style='font-size:12.0pt;font-family:"Times New Roman";
mso-fareast-font-family:"Times New Roman";mso-bidi-font-family:"Times New Roman";
mso-bidi-theme-font:minor-bidi'>Because of the complexity of the computation, I
have greatly simplified the example. <a href=test4>Test4</a> references files: <a href=simple4a.txt>simple4a.txt</a>,
<a href=simple4b.txt>simple4b.txt</a> and <a href=simple4c.txt>simple4c.txt</a>. It should produce <a
href="output4">this output</a>. For those who are excel savvy,
<a href=test4-comps.xlsx>this spreadsheet</a> shows each of the calculations.<o:p></o:p></span></p>

<p></p>
<h3 style='margin-left:0in'>Other Hints</h3>

<p class=MsoNormal><span style='font-size:12.0pt;font-family:"Times New Roman";
mso-fareast-font-family:"Times New Roman";mso-bidi-font-family:"Times New Roman";
mso-bidi-theme-font:minor-bidi'>You need to carefully consider how to divide up
the work and how to proceed. I recommend starting by making sure your TFIDF
computation from PA3 is correct. You can implement and test the <span
class=SpellE>HashTable</span> as a unit and then within PA3 before combining it
with the enhancements here. Test the <span class=SpellE>bestPages</span> method
on very simple cases first! Create some test cases before starting to code so that 
you think through what you need to program. Try creating your own examples using
the spreadsheet as a template.</span></p>

</div>

</body>

</html>