|
| 1 | +/** |
| 2 | + * @file |
| 3 | + * @brief Printing the [words contained in a |
| 4 | + * file](http://www.dailyfreecode.com/Code/word-list-reads-text-file-makes-2050.aspx) |
| 5 | + * named `file.txt` in alphabetical order and also their frequencies in to |
| 6 | + * another file "wordcount.txt" |
| 7 | + * @details |
| 8 | + * Given a file (`file.txt`) containing words (like a publication or a novel), |
| 9 | + * where words are separated by a space, newline, or underscore. |
| 10 | + * This program prints (writes or outputs) to another file (`wordcount.txt`), |
| 11 | + * the individual words contained in 'file.txt' with their frequencies (number |
| 12 | + * of occurences) each on a newline and in alphabetical order. This program uses |
| 13 | + * the binary tree data structure to accomplish this task. |
| 14 | + * @author [Randy Kwalar](https://github.com/RandyKdev) |
| 15 | + */ |
| 16 | + |
| 17 | +#include <assert.h> /// for assert |
| 18 | +#include <ctype.h> /// for type checks |
| 19 | +#include <inttypes.h> /// for uint64_t based types, int64_t based types |
| 20 | +#include <stdbool.h> /// for boolean data type |
| 21 | +#include <stdio.h> /// for IO operations |
| 22 | +#include <stdlib.h> /// for memory allocation |
| 23 | +#include <string.h> /// for string operations |
| 24 | + |
| 25 | +/** |
| 26 | + * @brief structure defining a node in the binary tree |
| 27 | + */ |
| 28 | +struct Node |
| 29 | +{ |
| 30 | + char *word; ///< the word (value) of the node |
| 31 | + uint64_t frequency; ///< number of occurences of the word |
| 32 | + struct Node *left; ///< pointer to the left child node |
| 33 | + struct Node *right; ///< pointer to the right child node |
| 34 | +}; |
| 35 | + |
| 36 | +/** |
| 37 | + * @brief Ends program due to an error |
| 38 | + * @param errorMessage the error message to be printed |
| 39 | + * @returns void |
| 40 | + */ |
| 41 | +void endProgramAbruptly(char *errorMessage) |
| 42 | +{ |
| 43 | + fprintf(stderr, "%s\n", errorMessage); |
| 44 | + exit(EXIT_FAILURE); |
| 45 | +} |
| 46 | + |
| 47 | +/** |
| 48 | + * @brief Frees memory when program is terminating |
| 49 | + * @param node pointer to current node |
| 50 | + * @returns void |
| 51 | + */ |
| 52 | +void freeTreeMemory(struct Node *node) |
| 53 | +{ |
| 54 | + if (node != NULL) |
| 55 | + { |
| 56 | + freeTreeMemory(node->left); |
| 57 | + freeTreeMemory(node->right); |
| 58 | + free(node->word); // freeing node->word because memory was allocated |
| 59 | + // using malloc |
| 60 | + free(node); // freeing node because memory was allocated using malloc |
| 61 | + } |
| 62 | +} |
| 63 | + |
| 64 | +/** |
| 65 | + * @brief Stores word in memory |
| 66 | + * @param word word to be stored in memory |
| 67 | + * @returns a pointer to the newly allocated word if the word IS stored successfully |
| 68 | + * @returns `NULL` if the word is NOT stored |
| 69 | + */ |
| 70 | +char *getPointerToWord(char *word) |
| 71 | +{ |
| 72 | + char *string = |
| 73 | + (char *)malloc((strlen(word) + 1) * sizeof(char)); ///< pointer to string |
| 74 | + // + 1 is for the '\0' character |
| 75 | + if (string != NULL) |
| 76 | + { |
| 77 | + strcpy(string, word); |
| 78 | + return string; |
| 79 | + } |
| 80 | + endProgramAbruptly( |
| 81 | + "\nA problem occurred while reserving memory for the word\n"); |
| 82 | + return NULL; |
| 83 | +} |
| 84 | + |
| 85 | +/** |
| 86 | + * @brief Closes the file after reading or writing |
| 87 | + * @param file pointer to the file to be closed |
| 88 | + * @returns void |
| 89 | + */ |
| 90 | +void closeFile(FILE *file) |
| 91 | +{ |
| 92 | + if (fclose(file)) { |
| 93 | + endProgramAbruptly("\nA Problem Occurred while closing a file\n"); |
| 94 | + } |
| 95 | +} |
| 96 | + |
| 97 | +/** |
| 98 | + * @brief Reserves memory for new node |
| 99 | + * @returns a pointer to the newly allocated node if memory IS successfully reserved |
| 100 | + * @returns `NULL` if memory is NOT reserved |
| 101 | + */ |
| 102 | +struct Node *allocateMemoryForNode() |
| 103 | +{ |
| 104 | + struct Node *node = |
| 105 | + (struct Node *)malloc(sizeof(struct Node)); ///< pointer to the node |
| 106 | + if (node != NULL) |
| 107 | + { |
| 108 | + return node; |
| 109 | + } |
| 110 | + endProgramAbruptly( |
| 111 | + "\nA problem occurred while reserving memory for the structure\n"); |
| 112 | + return NULL; |
| 113 | +} |
| 114 | + |
| 115 | +/** |
| 116 | + * @brief Writes contents of tree to another file alphabetically |
| 117 | + * @param node pointer to current node |
| 118 | + * @param file pointer to file |
| 119 | + * @returns void |
| 120 | + */ |
| 121 | +void writeContentOfTreeToFile(struct Node *node, FILE *file) |
| 122 | +{ |
| 123 | + static uint64_t i = 1; ///< for word numbering in the write file |
| 124 | + if (node != NULL) // checks if the node is valid |
| 125 | + { |
| 126 | + writeContentOfTreeToFile( |
| 127 | + node->left, |
| 128 | + file); // calls `writeContentOfTreeToFile` for left sub tree |
| 129 | + fprintf(file, "%-5lu \t %-9lu \t %s \n", i++, node->frequency, |
| 130 | + node->word); // prints the word number, word frequency and word |
| 131 | + // in tabular format to the file |
| 132 | + writeContentOfTreeToFile( |
| 133 | + node->right, |
| 134 | + file); // calls `writeContentOfTreeToFile` for right sub tree |
| 135 | + } |
| 136 | +} |
| 137 | + |
| 138 | +/** |
| 139 | + * @brief Adds word (node) to the correct position in tree |
| 140 | + * @param word word to be inserted in to the tree |
| 141 | + * @param currentNode node which is being compared |
| 142 | + * @returns a pointer to the root node |
| 143 | + */ |
| 144 | +struct Node *addWordToTree(char *word, struct Node *currentNode) |
| 145 | +{ |
| 146 | + if (currentNode == NULL) // checks if `currentNode` is `NULL` |
| 147 | + { |
| 148 | + struct Node *currentNode = |
| 149 | + allocateMemoryForNode(); // allocates memory for new node |
| 150 | + currentNode->word = getPointerToWord(word); // stores `word` in memory |
| 151 | + currentNode->frequency = 1; // initializes the word frequency to 1 |
| 152 | + currentNode->left = NULL; // sets left node to `NULL` |
| 153 | + currentNode->right = NULL; // sets right node to `NULL` |
| 154 | + return currentNode; // returns pointer to newly created node |
| 155 | + } |
| 156 | + |
| 157 | + int64_t compared = strcmp(word, currentNode->word); ///< holds compare state |
| 158 | + |
| 159 | + if (compared > 0) { |
| 160 | + currentNode->right = addWordToTree(word, |
| 161 | + currentNode->right); // adds `word` to right sub tree if `word` is |
| 162 | + // alphabetically greater than `currentNode->word` |
| 163 | + } |
| 164 | + else if (compared < 0) { |
| 165 | + currentNode->left = addWordToTree(word, |
| 166 | + currentNode->left); // adds `word` to left sub tree if `word` is |
| 167 | + // alphabetically less than `currentNode->word` |
| 168 | + } |
| 169 | + else { |
| 170 | + currentNode->frequency++; // increments `currentNode` frequency if `word` is the same as `currentNode->word` |
| 171 | + } |
| 172 | + |
| 173 | + return currentNode; // returns pointer to current node |
| 174 | +} |
| 175 | + |
| 176 | +/** |
| 177 | + * @brief Reads words from file to tree |
| 178 | + * @param file file to be read from |
| 179 | + * @param root root node of tree |
| 180 | + * @returns a pointer to the root node |
| 181 | + */ |
| 182 | +struct Node *readWordsInFileToTree(FILE *file, struct Node *root) |
| 183 | +{ |
| 184 | + // longest english word = 45 chars |
| 185 | + // +1 for '\0' = 46 chars |
| 186 | + char *inputString = |
| 187 | + (char *)malloc(46 * sizeof(char)); ///< pointer to the input string |
| 188 | + |
| 189 | + char inputChar; ///< temp storage of characters |
| 190 | + bool isPrevCharAlpha = false; ///< bool to mark the end of a word |
| 191 | + uint8_t pos = 0; ///< position in inputString to place the inputChar |
| 192 | + |
| 193 | + while ((inputChar = fgetc(file)) != EOF) |
| 194 | + { |
| 195 | + if (pos > 0) |
| 196 | + isPrevCharAlpha = isalpha(inputString[pos - 1]); |
| 197 | + |
| 198 | + // checks if character is letter |
| 199 | + if (isalpha(inputChar)) |
| 200 | + { |
| 201 | + inputString[pos++] = tolower(inputChar); |
| 202 | + continue; |
| 203 | + } |
| 204 | + |
| 205 | + // checks if character is ' or - and if it is preceded by a letter eg |
| 206 | + // yours-not, persons' (valid) |
| 207 | + if ((inputChar == '\'' || inputChar == '-') && isPrevCharAlpha) |
| 208 | + { |
| 209 | + inputString[pos++] = inputChar; |
| 210 | + continue; |
| 211 | + } |
| 212 | + |
| 213 | + // makes sure that there is something valid in inputString |
| 214 | + if (pos == 0) |
| 215 | + continue; |
| 216 | + |
| 217 | + // if last character is not letter and is not ' then replace by \0 |
| 218 | + if (!isPrevCharAlpha && inputString[pos - 1] != '\'') |
| 219 | + pos--; |
| 220 | + inputString[pos] = '\0'; |
| 221 | + pos = 0; |
| 222 | + isPrevCharAlpha = false; |
| 223 | + root = addWordToTree(inputString, root); |
| 224 | + } |
| 225 | + |
| 226 | + // this is to catch the case for the EOF being immediately after the last |
| 227 | + // letter or ' |
| 228 | + if (pos > 0) |
| 229 | + { |
| 230 | + if (!isPrevCharAlpha && inputString[pos - 1] != '\'') |
| 231 | + pos--; |
| 232 | + inputString[pos] = '\0'; |
| 233 | + root = addWordToTree(inputString, root); |
| 234 | + } |
| 235 | + |
| 236 | + free(inputString); |
| 237 | + return root; |
| 238 | +} |
| 239 | + |
| 240 | +/** |
| 241 | + * @brief Self-test implementations |
| 242 | + * @returns void |
| 243 | + */ |
| 244 | +static void test() |
| 245 | +{ |
| 246 | + struct Node *root = NULL; ///< pointer to the root node |
| 247 | + FILE *file = NULL; ///< pointer to the file |
| 248 | + |
| 249 | + file = fopen("file.txt", "w"); // creates test file in write mode |
| 250 | + |
| 251 | + fprintf(file, |
| 252 | + "hey_this, is a. test input \n to a_file"); // writes test data to |
| 253 | + // test file |
| 254 | + |
| 255 | + closeFile(file); // closes test file |
| 256 | + file = fopen("file.txt", "r"); // reopens test file in read mode |
| 257 | + |
| 258 | + root = readWordsInFileToTree(file, |
| 259 | + root); // reads words from test file to tree |
| 260 | + |
| 261 | + // Tests to check if words were added to correct position in tree and also |
| 262 | + // if their frequencies were added correctly |
| 263 | + assert(strcmp(root->word, "hey") == 0); |
| 264 | + assert(root->frequency == 1); |
| 265 | + assert(strcmp(root->left->word, "a") == 0); |
| 266 | + assert(root->left->frequency == 2); |
| 267 | + assert(strcmp(root->right->word, "this") == 0); |
| 268 | + assert(strcmp(root->left->right->word, "file") == 0); |
| 269 | + assert(strcmp(root->right->left->word, "is") == 0); |
| 270 | + |
| 271 | + closeFile(file); // closes test file |
| 272 | + remove("file.txt"); // deletes test file from storage |
| 273 | + |
| 274 | + file = fopen("wordcount.txt", "a"); // creates write file |
| 275 | + fprintf(file, "%-5s \t %9s \t %s \n", "S/N", "FREQUENCY", |
| 276 | + "WORD"); // prints the heading to `wordcount.txt` |
| 277 | + writeContentOfTreeToFile( |
| 278 | + root, file); // writes content of tree to file (`wordcount.txt`) |
| 279 | + |
| 280 | + // Here is how the output to `wordcount.txt` should look like |
| 281 | + char *correctString = |
| 282 | + "S/N FREQUENCY WORD \n" |
| 283 | + "1 2 a \n" |
| 284 | + "2 1 file \n" |
| 285 | + "3 1 hey \n" |
| 286 | + "4 1 input \n" |
| 287 | + "5 1 is \n" |
| 288 | + "6 1 n \n" |
| 289 | + "7 1 test \n" |
| 290 | + "8 1 this \n" |
| 291 | + "9 1 to \n"; |
| 292 | + |
| 293 | + int16_t inputChar; // holds the current character in `wordcount.txt` |
| 294 | + uint64_t i = 0; // holds the current index in `correctString` |
| 295 | + |
| 296 | + // Checks if the content in `wordcount.txt` is as expected (the same as in |
| 297 | + // `correctString`) |
| 298 | + while ((inputChar = fgetc(file)) != EOF) { |
| 299 | + assert(inputChar == correctString[i++]); |
| 300 | + } |
| 301 | + |
| 302 | + closeFile(file); // closes `wordcount.txt` |
| 303 | + remove("wordcount.txt"); // deletes `wordcount.txt` |
| 304 | + |
| 305 | + freeTreeMemory(root); // frees memory taken up by the tree |
| 306 | +} |
| 307 | + |
| 308 | +/** |
| 309 | + * @brief Main function |
| 310 | + * @returns 0 on exit |
| 311 | + */ |
| 312 | +int main() |
| 313 | +{ |
| 314 | + test(); // run self-test implementations |
| 315 | + return 0; |
| 316 | +} |
0 commit comments