-
-
Notifications
You must be signed in to change notification settings - Fork 4.5k
/
Copy pathwords_alphabetical.c
316 lines (281 loc) · 10.3 KB
/
words_alphabetical.c
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
/**
* @file
* @brief Printing the [words contained in a
* file](http://www.dailyfreecode.com/Code/word-list-reads-text-file-makes-2050.aspx)
* named `file.txt` in alphabetical order and also their frequencies in to
* another file "wordcount.txt"
* @details
* Given a file (`file.txt`) containing words (like a publication or a novel),
* where words are separated by a space, newline, or underscore.
* This program prints (writes or outputs) to another file (`wordcount.txt`),
* the individual words contained in 'file.txt' with their frequencies (number
* of occurrences) each on a newline and in alphabetical order. This program uses
* the binary tree data structure to accomplish this task.
* @author [Randy Kwalar](https://github.com/RandyKdev)
*/
#include <assert.h> /// for assert
#include <ctype.h> /// for type checks
#include <inttypes.h> /// for uint64_t based types, int64_t based types
#include <stdbool.h> /// for boolean data type
#include <stdio.h> /// for IO operations
#include <stdlib.h> /// for memory allocation
#include <string.h> /// for string operations
/**
* @brief structure defining a node in the binary tree
*/
struct Node
{
char *word; ///< the word (value) of the node
uint64_t frequency; ///< number of occurrences of the word
struct Node *left; ///< pointer to the left child node
struct Node *right; ///< pointer to the right child node
};
/**
* @brief Ends program due to an error
* @param errorMessage the error message to be printed
* @returns void
*/
void endProgramAbruptly(char *errorMessage)
{
fprintf(stderr, "%s\n", errorMessage);
exit(EXIT_FAILURE);
}
/**
* @brief Frees memory when program is terminating
* @param node pointer to current node
* @returns void
*/
void freeTreeMemory(struct Node *node)
{
if (node != NULL)
{
freeTreeMemory(node->left);
freeTreeMemory(node->right);
free(node->word); // freeing node->word because memory was allocated
// using malloc
free(node); // freeing node because memory was allocated using malloc
}
}
/**
* @brief Stores word in memory
* @param word word to be stored in memory
* @returns a pointer to the newly allocated word if the word IS stored successfully
* @returns `NULL` if the word is NOT stored
*/
char *getPointerToWord(char *word)
{
char *string =
(char *)malloc((strlen(word) + 1) * sizeof(char)); ///< pointer to string
// + 1 is for the '\0' character
if (string != NULL)
{
strcpy(string, word);
return string;
}
endProgramAbruptly(
"\nA problem occurred while reserving memory for the word\n");
return NULL;
}
/**
* @brief Closes the file after reading or writing
* @param file pointer to the file to be closed
* @returns void
*/
void closeFile(FILE *file)
{
if (fclose(file)) {
endProgramAbruptly("\nA Problem Occurred while closing a file\n");
}
}
/**
* @brief Reserves memory for new node
* @returns a pointer to the newly allocated node if memory IS successfully reserved
* @returns `NULL` if memory is NOT reserved
*/
struct Node *allocateMemoryForNode()
{
struct Node *node =
(struct Node *)malloc(sizeof(struct Node)); ///< pointer to the node
if (node != NULL)
{
return node;
}
endProgramAbruptly(
"\nA problem occurred while reserving memory for the structure\n");
return NULL;
}
/**
* @brief Writes contents of tree to another file alphabetically
* @param node pointer to current node
* @param file pointer to file
* @returns void
*/
void writeContentOfTreeToFile(struct Node *node, FILE *file)
{
static uint64_t i = 1; ///< for word numbering in the write file
if (node != NULL) // checks if the node is valid
{
writeContentOfTreeToFile(
node->left,
file); // calls `writeContentOfTreeToFile` for left sub tree
fprintf(file, "%-5lu \t %-9lu \t %s \n", i++, node->frequency,
node->word); // prints the word number, word frequency and word
// in tabular format to the file
writeContentOfTreeToFile(
node->right,
file); // calls `writeContentOfTreeToFile` for right sub tree
}
}
/**
* @brief Adds word (node) to the correct position in tree
* @param word word to be inserted in to the tree
* @param currentNode node which is being compared
* @returns a pointer to the root node
*/
struct Node *addWordToTree(char *word, struct Node *currentNode)
{
if (currentNode == NULL) // checks if `currentNode` is `NULL`
{
struct Node *currentNode =
allocateMemoryForNode(); // allocates memory for new node
currentNode->word = getPointerToWord(word); // stores `word` in memory
currentNode->frequency = 1; // initializes the word frequency to 1
currentNode->left = NULL; // sets left node to `NULL`
currentNode->right = NULL; // sets right node to `NULL`
return currentNode; // returns pointer to newly created node
}
int64_t compared = strcmp(word, currentNode->word); ///< holds compare state
if (compared > 0) {
currentNode->right = addWordToTree(word,
currentNode->right); // adds `word` to right sub tree if `word` is
// alphabetically greater than `currentNode->word`
}
else if (compared < 0) {
currentNode->left = addWordToTree(word,
currentNode->left); // adds `word` to left sub tree if `word` is
// alphabetically less than `currentNode->word`
}
else {
currentNode->frequency++; // increments `currentNode` frequency if `word` is the same as `currentNode->word`
}
return currentNode; // returns pointer to current node
}
/**
* @brief Reads words from file to tree
* @param file file to be read from
* @param root root node of tree
* @returns a pointer to the root node
*/
struct Node *readWordsInFileToTree(FILE *file, struct Node *root)
{
// longest english word = 45 chars
// +1 for '\0' = 46 chars
char *inputString =
(char *)malloc(46 * sizeof(char)); ///< pointer to the input string
char inputChar; ///< temp storage of characters
bool isPrevCharAlpha = false; ///< bool to mark the end of a word
uint8_t pos = 0; ///< position in inputString to place the inputChar
while ((inputChar = fgetc(file)) != EOF)
{
if (pos > 0)
isPrevCharAlpha = isalpha(inputString[pos - 1]);
// checks if character is letter
if (isalpha(inputChar))
{
inputString[pos++] = tolower(inputChar);
continue;
}
// checks if character is ' or - and if it is preceded by a letter eg
// yours-not, persons' (valid)
if ((inputChar == '\'' || inputChar == '-') && isPrevCharAlpha)
{
inputString[pos++] = inputChar;
continue;
}
// makes sure that there is something valid in inputString
if (pos == 0)
continue;
// if last character is not letter and is not ' then replace by \0
if (!isPrevCharAlpha && inputString[pos - 1] != '\'')
pos--;
inputString[pos] = '\0';
pos = 0;
isPrevCharAlpha = false;
root = addWordToTree(inputString, root);
}
// this is to catch the case for the EOF being immediately after the last
// letter or '
if (pos > 0)
{
if (!isPrevCharAlpha && inputString[pos - 1] != '\'')
pos--;
inputString[pos] = '\0';
root = addWordToTree(inputString, root);
}
free(inputString);
return root;
}
/**
* @brief Self-test implementations
* @returns void
*/
static void test()
{
struct Node *root = NULL; ///< pointer to the root node
FILE *file = NULL; ///< pointer to the file
file = fopen("file.txt", "w"); // creates test file in write mode
fprintf(file,
"hey_this, is a. test input \n to a_file"); // writes test data to
// test file
closeFile(file); // closes test file
file = fopen("file.txt", "r"); // reopens test file in read mode
root = readWordsInFileToTree(file,
root); // reads words from test file to tree
// Tests to check if words were added to correct position in tree and also
// if their frequencies were added correctly
assert(strcmp(root->word, "hey") == 0);
assert(root->frequency == 1);
assert(strcmp(root->left->word, "a") == 0);
assert(root->left->frequency == 2);
assert(strcmp(root->right->word, "this") == 0);
assert(strcmp(root->left->right->word, "file") == 0);
assert(strcmp(root->right->left->word, "is") == 0);
closeFile(file); // closes test file
remove("file.txt"); // deletes test file from storage
file = fopen("wordcount.txt", "a"); // creates write file
fprintf(file, "%-5s \t %9s \t %s \n", "S/N", "FREQUENCY",
"WORD"); // prints the heading to `wordcount.txt`
writeContentOfTreeToFile(
root, file); // writes content of tree to file (`wordcount.txt`)
// Here is how the output to `wordcount.txt` should look like
char *correctString =
"S/N FREQUENCY WORD \n"
"1 2 a \n"
"2 1 file \n"
"3 1 hey \n"
"4 1 input \n"
"5 1 is \n"
"6 1 n \n"
"7 1 test \n"
"8 1 this \n"
"9 1 to \n";
int16_t inputChar; // holds the current character in `wordcount.txt`
uint64_t i = 0; // holds the current index in `correctString`
// Checks if the content in `wordcount.txt` is as expected (the same as in
// `correctString`)
while ((inputChar = fgetc(file)) != EOF) {
assert(inputChar == correctString[i++]);
}
closeFile(file); // closes `wordcount.txt`
remove("wordcount.txt"); // deletes `wordcount.txt`
freeTreeMemory(root); // frees memory taken up by the tree
}
/**
* @brief Main function
* @returns 0 on exit
*/
int main()
{
test(); // run self-test implementations
return 0;
}