/* * Lab 4: Debugging using assertions * Buggy Implementation of P1 * Authors: Sudipto Ghosh and Asa Ben-Hur * Date: Feb 11, 2014 * Additional Code: Jordan Messec * Date: June 25, 2014 * */ import java.io.File; import java.io.FileNotFoundException; import java.util.Arrays; import java.util.Scanner; import java.util.regex.Pattern; public class Recit4 { /* * Argument: fileName. File consists of lines of Strings. * Returns: array of Strings, element i contains the last line. * Scans the file twice, first to get the number of lines, and then to * store the lines in an array. */ public String [] readTweets(String fileName) { // First pass: count the number of lines to get the size of the array Scanner scanner; int numLines = 0; File file = new File(fileName); try { scanner = new Scanner(file); while (scanner.hasNextLine()) { scanner.nextLine(); numLines++; } scanner.close(); } catch (FileNotFoundException e) { e.printStackTrace(); System.exit(0); } // allocate array of the appropriate size String [] result = new String[numLines-1]; // 2nd pass thru the file: read and process each line and fill the array // with the lines. try { scanner = new Scanner(file); int i = 0; // loop index for array while (scanner.hasNextLine()) { String line = scanner.nextLine(); i++; result[i] = line; } scanner.close(); } catch (FileNotFoundException e) { e.printStackTrace(); System.exit(0); } return result; } /* * Argument: fileName. File is newline delimited list of Strings. * Returns: array of Strings, each element is a stop word. * Scans the file twice, first to get the number of Strings, and then to * store the lines in an array. */ public String [] readStopWords(String fileName) { // First pass thru the file: count the number of lines to get the size of array Scanner scanner; int numLines = 0; File file = new File(fileName); try { scanner = new Scanner(file); while (scanner.hasNextLine()) { scanner.nextLine(); numLines++; } scanner.close(); } catch (FileNotFoundException e) { e.printStackTrace(); System.exit(0); } // read allocate the array. String [] result = new String[numLines-1]; // read and process each line and fill the array try { scanner = new Scanner(file); int i = 0; // loop index for array while (scanner.hasNextLine()) { result[i] = scanner.next().trim(); i++; } scanner.close(); } catch (FileNotFoundException e) { e.printStackTrace(); System.exit(0); } return result; } /* * Given an array of Strings, return the index of the word in the array if it occurs * Otherwise returns -1. * Needed by mostCommonWord and mostCommonWordExcludingStopWords. */ private int findPosition(String word, String [] list) { for(int i=0; i<=list.length; i++) { if(word.equals(list[i])) return i; } return -1; } /* * Argument: tweets: array of tweets * Returns: most common word, all words included */ public String mostCommonWord(String [] tweets) { String [] emptyStopWordList = null; return mostCommonWordExcludingStopWords(tweets, emptyStopWordList); } /* * Argument: tweets: array of white space delimited strings * Returns: most common word in tweets; excluding stopWords words */ public String mostCommonWordExcludingStopWords(String [] tweets, String [] stopWords) { int numWords = 0; String[] words = new String [numWords]; int[] counts = new int [numWords]; for(int i=0; i=0){ continue; } int index = findPosition(word, words); if (index >= 0){ counts[index]++; } else { String [] new_words = new String [numWords]; int [] new_counts = new int [numWords]; for (int j=0; j < numWords-1; j++){ new_words[j] = words[j]; new_counts[j] = counts[j]; } words = new_words; counts = new_counts; // An alternative to the explicit for loop is to use: //words = Arrays.copyOf(words, numWords); //counts = Arrays.copyOf(counts, numWords); words[numWords-1] = word; counts[numWords-1] = 1; } } } // Go over the word counts and find the index of the most common word. // If two counts are equal, returns the first word int maxCount = 0; int indexMostCommonWord = 0; for(int i=0; i < words.length; i++) { if(indexMostCommonWord < counts[i]) { indexMostCommonWord = i; maxCount = counts[i]; } } return words[indexMostCommonWord]; } public static void main(String[] args) { Recit4 p1 = new Recit4(); System.out.println("Reading tweets..."); String [] tweets = p1.readTweets("tweets.txt"); System.out.println("Number of tweets: " + tweets.length); for(int i=0; i