Raise the default value of up to 512 MB (#46)

andsel · jsvd · web-flow · commit d907249dc072 · 2024-09-05T17:25:20.000+02:00
Every parsing of incoming data should be limited, to avoid OOM. The original 20MB maybe is to low for some circumstances. To avoid generate noise to users that appropriately parses big json lines it's raised up to 512MB.

Updates the default value for setting decode_size_limit_bytes to 512MB from 20MB, and print a deprecation log to inform the user the default value will be lowered in future version.

---------

Co-authored-by: João Duarte &lt;jsvd@users.noreply.github.com&gt;
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -1,3 +1,6 @@
+## 3.2.1
+  - Raise the default value of `decode_size_limit_bytes` up to 512 MB. [#46](https://github.com/logstash-plugins/logstash-codec-json_lines/pull/46)
+
 ## 3.2.0
   - Add decode_size_limit_bytes option to limit the maximum size of each JSON line that can be parsed.[#43](https://github.com/logstash-plugins/logstash-codec-json_lines/pull/43)
 
diff --git a/docs/index.asciidoc b/docs/index.asciidoc
@@ -35,6 +35,7 @@ Therefore this codec cannot work with line oriented inputs.
 |=======================================================================
 |Setting |Input type|Required
 | <<plugins-{type}s-{plugin}-charset>> |<<string,string>>, one of `["ASCII-8BIT", "UTF-8", "US-ASCII", "Big5", "Big5-HKSCS", "Big5-UAO", "CP949", "Emacs-Mule", "EUC-JP", "EUC-KR", "EUC-TW", "GB2312", "GB18030", "GBK", "ISO-8859-1", "ISO-8859-2", "ISO-8859-3", "ISO-8859-4", "ISO-8859-5", "ISO-8859-6", "ISO-8859-7", "ISO-8859-8", "ISO-8859-9", "ISO-8859-10", "ISO-8859-11", "ISO-8859-13", "ISO-8859-14", "ISO-8859-15", "ISO-8859-16", "KOI8-R", "KOI8-U", "Shift_JIS", "UTF-16BE", "UTF-16LE", "UTF-32BE", "UTF-32LE", "Windows-31J", "Windows-1250", "Windows-1251", "Windows-1252", "IBM437", "IBM737", "IBM775", "CP850", "IBM852", "CP852", "IBM855", "CP855", "IBM857", "IBM860", "IBM861", "IBM862", "IBM863", "IBM864", "IBM865", "IBM866", "IBM869", "Windows-1258", "GB1988", "macCentEuro", "macCroatian", "macCyrillic", "macGreek", "macIceland", "macRoman", "macRomania", "macThai", "macTurkish", "macUkraine", "CP950", "CP951", "IBM037", "stateless-ISO-2022-JP", "eucJP-ms", "CP51932", "EUC-JIS-2004", "GB12345", "ISO-2022-JP", "ISO-2022-JP-2", "CP50220", "CP50221", "Windows-1256", "Windows-1253", "Windows-1255", "Windows-1254", "TIS-620", "Windows-874", "Windows-1257", "MacJapanese", "UTF-7", "UTF8-MAC", "UTF-16", "UTF-32", "UTF8-DoCoMo", "SJIS-DoCoMo", "UTF8-KDDI", "SJIS-KDDI", "ISO-2022-JP-KDDI", "stateless-ISO-2022-JP-KDDI", "UTF8-SoftBank", "SJIS-SoftBank", "BINARY", "CP437", "CP737", "CP775", "IBM850", "CP857", "CP860", "CP861", "CP862", "CP863", "CP864", "CP865", "CP866", "CP869", "CP1258", "Big5-HKSCS:2008", "ebcdic-cp-us", "eucJP", "euc-jp-ms", "EUC-JISX0213", "eucKR", "eucTW", "EUC-CN", "eucCN", "CP936", "ISO2022-JP", "ISO2022-JP2", "ISO8859-1", "ISO8859-2", "ISO8859-3", "ISO8859-4", "ISO8859-5", "ISO8859-6", "CP1256", "ISO8859-7", "CP1253", "ISO8859-8", "CP1255", "ISO8859-9", "CP1254", "ISO8859-10", "ISO8859-11", "CP874", "ISO8859-13", "CP1257", "ISO8859-14", "ISO8859-15", "ISO8859-16", "CP878", "MacJapan", "ASCII", "ANSI_X3.4-1968", "646", "CP65000", "CP65001", "UTF-8-MAC", "UTF-8-HFS", "UCS-2BE", "UCS-4BE", "UCS-4LE", "CP932", "csWindows31J", "SJIS", "PCK", "CP1250", "CP1251", "CP1252", "external", "locale"]`|No
+| <<plugins-{type}s-{plugin}-decode_size_limit_bytes>> |<<string,string>>|No
 | <<plugins-{type}s-{plugin}-delimiter>> |<<string,string>>|No
 | <<plugins-{type}s-{plugin}-ecs_compatibility>> |<<string,string>>|No
 | <<plugins-{type}s-{plugin}-target>> |<<string,string>>|No
@@ -58,6 +59,14 @@ actual encoding of the text and logstash will convert it for you.
 
 For nxlog users, you'll want to set this to `CP1252`
 
+[id="plugins-{type}s-{plugin}-decode_size_limit_bytes"]
+===== `decode_size_limit_bytes`
+
+* Value type is <<string,string>>
+* Default value is 512 MB
+
+Maximum number of bytes for a single line before stop processing.
+
 [id="plugins-{type}s-{plugin}-delimiter"]
 ===== `delimiter` 
 
diff --git a/lib/logstash/codecs/json_lines.rb b/lib/logstash/codecs/json_lines.rb
@@ -28,6 +28,8 @@ class LogStash::Codecs::JSONLines < LogStash::Codecs::Base
 
   config_name "json_lines"
 
+  DEFAULT_DECODE_SIZE_LIMIT_BYTES = 512 * (1024 * 1024)
+
   # The character encoding used in this codec. Examples include `UTF-8` and
   # `CP1252`
   #
@@ -43,9 +45,9 @@ class LogStash::Codecs::JSONLines < LogStash::Codecs::Base
   config :delimiter, :validate => :string, :default => "\n"
 
   # Maximum number of bytes for a single line before a fatal exception is raised
-  # which will stop Logsash.
-  # The default is 20MB which is quite large for a JSON document
-  config :decode_size_limit_bytes, :validate => :number, :default => 20 * (1024 * 1024) # 20MB
+  # which will stop Logstash.
+  # The default is 512MB which is quite large for a JSON document
+  config :decode_size_limit_bytes, :validate => :number, :default => DEFAULT_DECODE_SIZE_LIMIT_BYTES # 512MB
 
   # Defines a target field for placing decoded fields.
   # If this setting is omitted, data gets stored at the root (top level) of the event.
@@ -55,6 +57,9 @@ class LogStash::Codecs::JSONLines < LogStash::Codecs::Base
   public
 
   def register
+    if decode_size_limit_bytes == DEFAULT_DECODE_SIZE_LIMIT_BYTES
+      deprecation_logger.deprecated "The default value for `decode_size_limit_bytes`, currently at 512Mb, will be lowered in a future version to prevent Out of Memory errors from abnormally large messages or missing delimiters. Please set a value that reflects the largest expected message size (e.g. 20971520 for 20Mb)"
+    end
     @buffer = FileWatch::BufferedTokenizer.new(@delimiter, @decode_size_limit_bytes)
     @converter = LogStash::Util::Charset.new(@charset)
     @converter.logger = @logger
diff --git a/logstash-codec-json_lines.gemspec b/logstash-codec-json_lines.gemspec
@@ -1,7 +1,7 @@
 Gem::Specification.new do |s|
 
   s.name            = 'logstash-codec-json_lines'
-  s.version         = '3.2.0'
+  s.version         = '3.2.1'
   s.licenses        = ['Apache License (2.0)']
   s.summary         = "Reads and writes newline-delimited JSON"
   s.description     = "This gem is a Logstash plugin required to be installed on top of the Logstash core pipeline using $LS_HOME/bin/logstash-plugin install gemname. This gem is not a stand-alone program"
diff --git a/spec/codecs/json_lines_spec.rb b/spec/codecs/json_lines_spec.rb
@@ -119,6 +119,7 @@
     end
 
     describe "decode_size_limits_bytes" do
+      let(:codec_options) { { "decode_size_limit_bytes" => 20 * 1024 * 1024 } } # lower the default to avoid OOM errors in tests
       let(:maximum_payload) { "a" * subject.decode_size_limit_bytes }
 
       it "should not raise an error if the number of bytes is not exceeded" do