|
| 1 | +maxSeqLength = 250 |
| 2 | +batchSize = 24 |
| 3 | + |
| 4 | +import numpy as np |
| 5 | +import tensorflow as tf |
| 6 | +import re |
| 7 | + |
| 8 | +wordsList = np.load('other_data/wordsList.npy').tolist() |
| 9 | +wordsList = [word.decode('UTF-8') for word in wordsList] |
| 10 | +wordVectors = np.load('other_data/wordVectors.npy') |
| 11 | +strip_special_chars = re.compile("[^A-Za-z0-9 ]+") |
| 12 | + |
| 13 | +def cleanSentences(string): |
| 14 | + string = string.lower().replace("<br />", " ") |
| 15 | + return re.sub(strip_special_chars, "", string.lower()) |
| 16 | + |
| 17 | +def getSentenceMatrix(sentence): |
| 18 | + arr = np.zeros([batchSize, maxSeqLength]) |
| 19 | + sentenceMatrix = np.zeros([batchSize,maxSeqLength], dtype='int32') |
| 20 | + cleanedSentence = cleanSentences(sentence) |
| 21 | + split = cleanedSentence.split() |
| 22 | + for indexCounter,word in enumerate(split): |
| 23 | + try: |
| 24 | + sentenceMatrix[0,indexCounter] = wordsList.index(word) |
| 25 | + except ValueError: |
| 26 | + sentenceMatrix[0,indexCounter] = 399999 |
| 27 | + return sentenceMatrix |
| 28 | + |
| 29 | +inputText = "That movie was terrible." |
| 30 | +inputMatrix = getSentenceMatrix(inputText) |
| 31 | +print inputMatrix |
| 32 | +print inputMatrix.shape |
| 33 | +np.savetxt("inputMatrixNegative.csv", inputMatrix, delimiter=',', fmt="%i") |
| 34 | + |
| 35 | +secondInputText = "That movie was the best one I have ever seen." |
| 36 | +secondInputMatrix = getSentenceMatrix(secondInputText) |
| 37 | +print secondInputMatrix |
| 38 | +print secondInputMatrix.shape |
| 39 | +np.savetxt("inputMatrixPositive.csv", secondInputMatrix, delimiter=',', fmt="%i") |
0 commit comments