|
111 | 111 | - match: { tokens.0.position: 0 }
|
112 | 112 | - match: { tokens.1.token: "line" }
|
113 | 113 | - match: { tokens.1.start_offset: 10 }
|
114 |
| - - match: { tokens.1.end_offset: 14 } |
| 114 | + - match: { tokens.1.end_offset: 26 } |
115 | 115 | - match: { tokens.1.position: 1 }
|
| 116 | + |
| 117 | +--- |
| 118 | +"Test 7.14 analyzer with multiple multiline messages": |
| 119 | + - do: |
| 120 | + indices.analyze: |
| 121 | + body: > |
| 122 | + { |
| 123 | + "char_filter" : [ |
| 124 | + "first_non_blank_line" |
| 125 | + ], |
| 126 | + "tokenizer" : "ml_standard", |
| 127 | + "filter" : [ |
| 128 | + { "type" : "stop", "stopwords": [ |
| 129 | + "Monday", "Tuesday", "Wednesday", "Thursday", "Friday", "Saturday", "Sunday", |
| 130 | + "Mon", "Tue", "Wed", "Thu", "Fri", "Sat", "Sun", |
| 131 | + "January", "February", "March", "April", "May", "June", "July", "August", "September", "October", "November", "December", |
| 132 | + "Jan", "Feb", "Mar", "Apr", "May", "Jun", "Jul", "Aug", "Sep", "Oct", "Nov", "Dec", |
| 133 | + "GMT", "UTC" |
| 134 | + ] } |
| 135 | + ], |
| 136 | + "text" : [ |
| 137 | + " \nfirst line\nsecond line", |
| 138 | + " \nfirst line of second message\nsecond line of second message" |
| 139 | + ] |
| 140 | + } |
| 141 | + - match: { tokens.0.token: "first" } |
| 142 | + - match: { tokens.0.start_offset: 4 } |
| 143 | + - match: { tokens.0.end_offset: 9 } |
| 144 | + - match: { tokens.0.position: 0 } |
| 145 | + - match: { tokens.1.token: "line" } |
| 146 | + - match: { tokens.1.start_offset: 10 } |
| 147 | + - match: { tokens.1.end_offset: 26 } |
| 148 | + - match: { tokens.1.position: 1 } |
| 149 | + - match: { tokens.2.token: "first" } |
| 150 | + - match: { tokens.2.start_offset: 31 } |
| 151 | + - match: { tokens.2.end_offset: 36 } |
| 152 | + - match: { tokens.2.position: 102 } |
| 153 | + - match: { tokens.3.token: "line" } |
| 154 | + - match: { tokens.3.start_offset: 37 } |
| 155 | + - match: { tokens.3.end_offset: 41 } |
| 156 | + - match: { tokens.3.position: 103 } |
| 157 | + - match: { tokens.4.token: "of" } |
| 158 | + - match: { tokens.4.start_offset: 42 } |
| 159 | + - match: { tokens.4.end_offset: 44 } |
| 160 | + - match: { tokens.4.position: 104 } |
| 161 | + - match: { tokens.5.token: "second" } |
| 162 | + - match: { tokens.5.start_offset: 45 } |
| 163 | + - match: { tokens.5.end_offset: 51 } |
| 164 | + - match: { tokens.5.position: 105 } |
| 165 | + - match: { tokens.6.token: "message" } |
| 166 | + - match: { tokens.6.start_offset: 52 } |
| 167 | + - match: { tokens.6.end_offset: 89 } |
| 168 | + - match: { tokens.6.position: 106 } |
| 169 | + |
| 170 | +--- |
| 171 | +"Test 7.14 analyzer with stop words in messages": |
| 172 | + - do: |
| 173 | + indices.analyze: |
| 174 | + body: > |
| 175 | + { |
| 176 | + "char_filter" : [ |
| 177 | + "first_non_blank_line" |
| 178 | + ], |
| 179 | + "tokenizer" : "ml_standard", |
| 180 | + "filter" : [ |
| 181 | + { "type" : "stop", "stopwords": [ |
| 182 | + "Monday", "Tuesday", "Wednesday", "Thursday", "Friday", "Saturday", "Sunday", |
| 183 | + "Mon", "Tue", "Wed", "Thu", "Fri", "Sat", "Sun", |
| 184 | + "January", "February", "March", "April", "May", "June", "July", "August", "September", "October", "November", "December", |
| 185 | + "Jan", "Feb", "Mar", "Apr", "May", "Jun", "Jul", "Aug", "Sep", "Oct", "Nov", "Dec", |
| 186 | + "GMT", "UTC" |
| 187 | + ] } |
| 188 | + ], |
| 189 | + "text" : [ |
| 190 | + "May 27, 2021 @ 19:51:15.288 UTC log message one", |
| 191 | + "May 27, 2021 @ 19:52:25.288 UTC log message two" |
| 192 | + ] |
| 193 | + } |
| 194 | + - match: { tokens.0.token: "log" } |
| 195 | + - match: { tokens.0.start_offset: 32 } |
| 196 | + - match: { tokens.0.end_offset: 35 } |
| 197 | + - match: { tokens.0.position: 7 } |
| 198 | + - match: { tokens.1.token: "message" } |
| 199 | + - match: { tokens.1.start_offset: 36 } |
| 200 | + - match: { tokens.1.end_offset: 43 } |
| 201 | + - match: { tokens.1.position: 8 } |
| 202 | + - match: { tokens.2.token: "one" } |
| 203 | + - match: { tokens.2.start_offset: 44 } |
| 204 | + - match: { tokens.2.end_offset: 47 } |
| 205 | + - match: { tokens.2.position: 9 } |
| 206 | + - match: { tokens.3.token: "log" } |
| 207 | + - match: { tokens.3.start_offset: 80 } |
| 208 | + - match: { tokens.3.end_offset: 83 } |
| 209 | + - match: { tokens.3.position: 117 } |
| 210 | + - match: { tokens.4.token: "message" } |
| 211 | + - match: { tokens.4.start_offset: 84 } |
| 212 | + - match: { tokens.4.end_offset: 91 } |
| 213 | + - match: { tokens.4.position: 118 } |
| 214 | + - match: { tokens.5.token: "two" } |
| 215 | + - match: { tokens.5.start_offset: 92 } |
| 216 | + - match: { tokens.5.end_offset: 95 } |
| 217 | + - match: { tokens.5.position: 119 } |
0 commit comments