diff --git a/.travis.yml b/.travis.yml index 1458a3b..b5cfb80 100644 --- a/.travis.yml +++ b/.travis.yml @@ -1,18 +1,16 @@ --- -sudo: false -language: ruby -cache: bundler +sudo: required +services: docker +addons: + apt: + packages: + - docker-ce matrix: include: - - rvm: jruby-9.1.13.0 - env: LOGSTASH_BRANCH=master - - rvm: jruby-9.1.13.0 - env: LOGSTASH_BRANCH=6.x - - rvm: jruby-9.1.13.0 - env: LOGSTASH_BRANCH=6.0 - - rvm: jruby-1.7.27 - env: LOGSTASH_BRANCH=5.6 + - env: ELASTIC_STACK_VERSION=5.6.10 + - env: ELASTIC_STACK_VERSION=6.3.0 + - env: ELASTIC_STACK_VERSION=6.4.0-SNAPSHOT + - env: ELASTIC_STACK_VERSION=7.0.0-alpha1-SNAPSHOT fast_finish: true -install: true -script: ci/build.sh -jdk: oraclejdk8 +install: ci/unit/docker-setup.sh +script: ci/unit/docker-run.sh diff --git a/JAR_VERSION b/JAR_VERSION index 3eefcb9..7dea76e 100644 --- a/JAR_VERSION +++ b/JAR_VERSION @@ -1 +1 @@ -1.0.0 +1.0.1 diff --git a/README.md b/README.md index a48c73a..5153942 100644 --- a/README.md +++ b/README.md @@ -2,9 +2,6 @@ Travis Build [![Travis Build Status](https://travis-ci.org/logstash-plugins/logstash-input-file.svg)](https://travis-ci.org/logstash-plugins/logstash-input-file) -Jenkins Build -[![Travis Build Status](https://travis-ci.org/logstash-plugins/logstash-input-file.svg)](https://travis-ci.org/logstash-plugins/logstash-input-file) - This is a plugin for [Logstash](https://github.com/elastic/logstash). It is fully free and fully open source. The license is Apache 2.0, meaning you are pretty much free to use it however you want in whatever way. diff --git a/ci/build.sh b/ci/build.sh deleted file mode 100755 index 06caffd..0000000 --- a/ci/build.sh +++ /dev/null @@ -1,21 +0,0 @@ -#!/bin/bash -# version: 1 -######################################################## -# -# AUTOMATICALLY GENERATED! DO NOT EDIT -# -######################################################## -set -e - -echo "Starting build process in: `pwd`" -source ./ci/setup.sh - -if [[ -f "ci/run.sh" ]]; then - echo "Running custom build script in: `pwd`/ci/run.sh" - source ./ci/run.sh -else - echo "Running default build scripts in: `pwd`/ci/build.sh" - bundle install - bundle exec rake vendor - bundle exec rspec spec -fi diff --git a/ci/setup.sh b/ci/setup.sh deleted file mode 100755 index 835fa43..0000000 --- a/ci/setup.sh +++ /dev/null @@ -1,26 +0,0 @@ -#!/bin/bash -# version: 1 -######################################################## -# -# AUTOMATICALLY GENERATED! DO NOT EDIT -# -######################################################## -set -e -if [ "$LOGSTASH_BRANCH" ]; then - echo "Building plugin using Logstash source" - BASE_DIR=`pwd` - echo "Checking out branch: $LOGSTASH_BRANCH" - git clone -b $LOGSTASH_BRANCH https://github.com/elastic/logstash.git ../../logstash --depth 1 - printf "Checked out Logstash revision: %s\n" "$(git -C ../../logstash rev-parse HEAD)" - cd ../../logstash - echo "Building plugins with Logstash version:" - cat versions.yml - echo "---" - # We need to build the jars for that specific version - echo "Running gradle assemble in: `pwd`" - ./gradlew assemble - cd $BASE_DIR - export LOGSTASH_SOURCE=1 -else - echo "Building plugin using released gems on rubygems" -fi diff --git a/ci/unit/Dockerfile b/ci/unit/Dockerfile new file mode 100644 index 0000000..b2fc1c6 --- /dev/null +++ b/ci/unit/Dockerfile @@ -0,0 +1,11 @@ +ARG ELASTIC_STACK_VERSION +FROM docker.elastic.co/logstash/logstash:$ELASTIC_STACK_VERSION +WORKDIR /usr/share/logstash/logstash-core +RUN cp versions-gem-copy.yml ../logstash-core-plugin-api/versions-gem-copy.yml +COPY --chown=logstash:logstash . /usr/share/plugins/this +WORKDIR /usr/share/plugins/this +ENV PATH=/usr/share/logstash/vendor/jruby/bin:${PATH} +ENV LOGSTASH_SOURCE 1 +RUN jruby -S gem install bundler +RUN jruby -S bundle install --jobs=3 --retry=3 +RUN jruby -S bundle exec rake vendor diff --git a/ci/unit/docker-compose.yml b/ci/unit/docker-compose.yml new file mode 100644 index 0000000..1dd26fd --- /dev/null +++ b/ci/unit/docker-compose.yml @@ -0,0 +1,17 @@ +version: '3' + +# run tests: docker-compose -f ci/unit/docker-compose.yml up --build --force-recreate +# only set up: docker-compose -f ci/unit/docker-compose.yml up --build --no-start --force-recreate +# start manually: docker-compose -f ci/unit/docker-compose.yml run logstash +services: + logstash: + build: + context: ../../ + dockerfile: ci/unit/Dockerfile + args: + - ELASTIC_STACK_VERSION=$ELASTIC_STACK_VERSION + command: /usr/share/plugins/this/ci/unit/run.sh + environment: + LS_JAVA_OPTS: "-Xmx256m -Xms256m" + OSS: "true" + tty: true diff --git a/ci/unit/docker-run.sh b/ci/unit/docker-run.sh new file mode 100755 index 0000000..4007c55 --- /dev/null +++ b/ci/unit/docker-run.sh @@ -0,0 +1,7 @@ +#!/bin/bash + +# This is intended to be run the plugin's root directory. `ci/unit/docker-test.sh` +# Ensure you have Docker installed locally and set the ELASTIC_STACK_VERSION environment variable. +set -e + +docker-compose -f ci/unit/docker-compose.yml run logstash diff --git a/ci/unit/docker-setup.sh b/ci/unit/docker-setup.sh new file mode 100755 index 0000000..37bbcd0 --- /dev/null +++ b/ci/unit/docker-setup.sh @@ -0,0 +1,31 @@ +#!/bin/bash + +# This is intended to be run the plugin's root directory. `ci/unit/docker-test.sh` +# Ensure you have Docker installed locally and set the ELASTIC_STACK_VERSION environment variable. +set -e + +if [ "$ELASTIC_STACK_VERSION" ]; then + echo "Testing against version: $ELASTIC_STACK_VERSION" + + if [[ "$ELASTIC_STACK_VERSION" = *"-SNAPSHOT" ]]; then + cd /tmp + wget https://snapshots.elastic.co/docker/logstash-"$ELASTIC_STACK_VERSION".tar.gz + tar xfvz logstash-"$ELASTIC_STACK_VERSION".tar.gz repositories + echo "Loading docker image: " + cat repositories + docker load < logstash-"$ELASTIC_STACK_VERSION".tar.gz + rm logstash-"$ELASTIC_STACK_VERSION".tar.gz + cd - + fi + + if [ -f Gemfile.lock ]; then + rm Gemfile.lock + fi + + docker-compose -f ci/unit/docker-compose.yml down + docker-compose -f ci/unit/docker-compose.yml up --no-start --build --force-recreate logstash +else + echo "Please set the ELASTIC_STACK_VERSION environment variable" + echo "For example: export ELASTIC_STACK_VERSION=6.2.4" + exit 1 +fi diff --git a/ci/unit/run.sh b/ci/unit/run.sh new file mode 100755 index 0000000..91e54bb --- /dev/null +++ b/ci/unit/run.sh @@ -0,0 +1,6 @@ +#!/bin/bash + +# This is intended to be run inside the docker container as the command of the docker-compose. +set -ex + +bundle exec rspec -fd --pattern spec/**/*_spec.rb,spec/**/*_specs.rb diff --git a/lib/filewatch/bootstrap.rb b/lib/filewatch/bootstrap.rb index 99c196d..51e9b37 100644 --- a/lib/filewatch/bootstrap.rb +++ b/lib/filewatch/bootstrap.rb @@ -1,7 +1,5 @@ # encoding: utf-8 -require "rbconfig" require "pathname" -# require "logstash/environment" ## Common setup # all the required constants and files @@ -13,36 +11,26 @@ module FileWatch # this is used in the read loop e.g. # @opts[:file_chunk_count].times do # where file_chunk_count defaults to this constant - FIXNUM_MAX = (2**(0.size * 8 - 2) - 1) + MAX_ITERATIONS = (2**(0.size * 8 - 2) - 2) / 32768 require_relative "helper" - module WindowsInode - def prepare_inode(path, stat) - fileId = Winhelper.GetWindowsUniqueFileIdentifier(path) - [fileId, 0, 0] # dev_* doesn't make sense on Windows - end - end - - module UnixInode - def prepare_inode(path, stat) - [stat.ino.to_s, stat.dev_major, stat.dev_minor] - end - end - - jar_version = Pathname.new(__FILE__).dirname.join("../../JAR_VERSION").realpath.read.strip - + gem_root_dir = Pathname.new(__FILE__).dirname.join("../../").realpath + jar_version = gem_root_dir.join("JAR_VERSION").read.strip + fullpath = gem_root_dir.join("lib/jars/filewatch-#{jar_version}.jar").expand_path.to_path require "java" - require_relative "../../lib/jars/filewatch-#{jar_version}.jar" + require fullpath require "jruby_file_watch" if LogStash::Environment.windows? require_relative "winhelper" + require_relative "stat/windows_path" + PathStatClass = Stat::WindowsPath FileOpener = FileExt - InodeMixin = WindowsInode else + require_relative "stat/generic" + PathStatClass = Stat::Generic FileOpener = ::File - InodeMixin = UnixInode end # Structs can be used as hash keys because they compare by value diff --git a/lib/filewatch/discoverer.rb b/lib/filewatch/discoverer.rb index b98f77d..f73b1a3 100644 --- a/lib/filewatch/discoverer.rb +++ b/lib/filewatch/discoverer.rb @@ -10,8 +10,8 @@ class Discoverer include LogStash::Util::Loggable def initialize(watched_files_collection, sincedb_collection, settings) - @watching = [] - @exclude = [] + @watching = Concurrent::Array.new + @exclude = Concurrent::Array.new @watched_files_collection = watched_files_collection @sincedb_collection = sincedb_collection @settings = settings @@ -21,13 +21,13 @@ def initialize(watched_files_collection, sincedb_collection, settings) def add_path(path) return if @watching.member?(path) @watching << path - discover_files(path) + discover_files_new_path(path) self end def discover @watching.each do |path| - discover_files(path) + discover_files_ongoing(path) end end @@ -37,7 +37,7 @@ def can_exclude?(watched_file, new_discovery) @exclude.each do |pattern| if watched_file.pathname.fnmatch?(pattern) if new_discovery - logger.debug("Discoverer can_exclude?: #{watched_file.path}: skipping " + + logger.trace("Discoverer can_exclude?: #{watched_file.path}: skipping " + "because it matches exclude #{pattern}") end watched_file.unwatch @@ -47,45 +47,52 @@ def can_exclude?(watched_file, new_discovery) false end - def discover_files(path) - globbed = Dir.glob(path) - globbed = [path] if globbed.empty? - logger.debug("Discoverer found files, count: #{globbed.size}") - globbed.each do |file| - logger.debug("Discoverer found file, path: #{file}") + def discover_files_new_path(path) + discover_any_files(path, false) + end + + def discover_files_ongoing(path) + discover_any_files(path, true) + end + + def discover_any_files(path, ongoing) + fileset = Dir.glob(path).select{|f| File.file?(f) && !File.symlink?(f)} + logger.trace("discover_files", "count" => fileset.size) + fileset.each do |file| pathname = Pathname.new(file) - next unless pathname.file? - next if pathname.symlink? new_discovery = false watched_file = @watched_files_collection.watched_file_by_path(file) if watched_file.nil? - logger.debug("Discoverer discover_files: #{path}: new: #{file} (exclude is #{@exclude.inspect})") new_discovery = true - watched_file = WatchedFile.new(pathname, pathname.stat, @settings) + watched_file = WatchedFile.new(pathname, PathStatClass.new(pathname), @settings) end # if it already unwatched or its excluded then we can skip next if watched_file.unwatched? || can_exclude?(watched_file, new_discovery) + logger.trace("discover_files handling:", "new discovery"=> new_discovery, "watched_file details" => watched_file.details) + if new_discovery - if watched_file.file_ignorable? - logger.debug("Discoverer discover_files: #{file}: skipping because it was last modified more than #{@settings.ignore_older} seconds ago") - # on discovery ignorable watched_files are put into the ignored state and that - # updates the size from the internal stat - # so the existing contents are not read. - # because, normally, a newly discovered file will - # have a watched_file size of zero - # they are still added to the collection so we know they are there for the next periodic discovery - watched_file.ignore - end - # now add the discovered file to the watched_files collection and adjust the sincedb collections - @watched_files_collection.add(watched_file) + watched_file.initial_completed if ongoing # initially when the sincedb collection is filled with records from the persistence file # each value is not associated with a watched file # a sincedb_value can be: # unassociated # associated with this watched_file # associated with a different watched_file - @sincedb_collection.associate(watched_file) + if @sincedb_collection.associate(watched_file) + if watched_file.file_ignorable? + logger.trace("Discoverer discover_files: #{file}: skipping because it was last modified more than #{@settings.ignore_older} seconds ago") + # on discovery ignorable watched_files are put into the ignored state and that + # updates the size from the internal stat + # so the existing contents are not read. + # because, normally, a newly discovered file will + # have a watched_file size of zero + # they are still added to the collection so we know they are there for the next periodic discovery + watched_file.ignore_as_unread + end + # now add the discovered file to the watched_files collection and adjust the sincedb collections + @watched_files_collection.add(watched_file) + end end # at this point the watched file is created, is in the db but not yet opened or being processed end diff --git a/lib/filewatch/observing_base.rb b/lib/filewatch/observing_base.rb index 387ba77..7f9be79 100644 --- a/lib/filewatch/observing_base.rb +++ b/lib/filewatch/observing_base.rb @@ -44,7 +44,8 @@ def initialize(opts={}) :exclude => [], :start_new_files_at => :end, :delimiter => "\n", - :file_chunk_count => FIXNUM_MAX, + :file_chunk_count => MAX_ITERATIONS, + :file_chunk_size => FILE_READ_SIZE, :file_sort_by => "last_modified", :file_sort_direction => "asc", }.merge(opts) diff --git a/lib/filewatch/read_mode/handlers/base.rb b/lib/filewatch/read_mode/handlers/base.rb index 8146808..6a086d8 100644 --- a/lib/filewatch/read_mode/handlers/base.rb +++ b/lib/filewatch/read_mode/handlers/base.rb @@ -19,7 +19,7 @@ def quit? end def handle(watched_file) - logger.debug("handling: #{watched_file.path}") + logger.trace("handling: #{watched_file.path}") unless watched_file.has_listener? watched_file.set_listener(@observer) end @@ -34,7 +34,7 @@ def handle_specifically(watched_file) def open_file(watched_file) return true if watched_file.file_open? - logger.debug("opening #{watched_file.path}") + logger.trace("opening #{watched_file.path}") begin watched_file.open rescue @@ -46,7 +46,7 @@ def open_file(watched_file) logger.warn("failed to open #{watched_file.path}: #{$!.inspect}, #{$!.backtrace.take(3)}") watched_file.last_open_warning_at = now else - logger.debug("suppressed warning for `failed to open` #{watched_file.path}: #{$!.inspect}") + logger.trace("suppressed warning for `failed to open` #{watched_file.path}: #{$!.inspect}") end watched_file.watch # set it back to watch so we can try it again end @@ -65,13 +65,26 @@ def add_or_update_sincedb_collection(watched_file) elsif sincedb_value.watched_file == watched_file update_existing_sincedb_collection_value(watched_file, sincedb_value) else - logger.warn? && logger.warn("mismatch on sincedb_value.watched_file, this should have been handled by Discoverer") + msg = "add_or_update_sincedb_collection: the found sincedb_value has a watched_file - this is a rename, switching inode to this watched file" + logger.trace(msg) + existing_watched_file = sincedb_value.watched_file + if existing_watched_file.nil? + sincedb_value.set_watched_file(watched_file) + logger.trace("add_or_update_sincedb_collection: switching as new file") + watched_file.rotate_as_file + watched_file.update_bytes_read(sincedb_value.position) + else + sincedb_value.set_watched_file(watched_file) + logger.trace("add_or_update_sincedb_collection: switching from...", "watched_file details" => watched_file.details) + watched_file.rotate_from(existing_watched_file) + end + end watched_file.initial_completed end def update_existing_sincedb_collection_value(watched_file, sincedb_value) - logger.debug("update_existing_sincedb_collection_value: #{watched_file.path}, last value #{sincedb_value.position}, cur size #{watched_file.last_stat_size}") + logger.trace("update_existing_sincedb_collection_value: #{watched_file.path}, last value #{sincedb_value.position}, cur size #{watched_file.last_stat_size}") # sincedb_value is the source of truth watched_file.update_bytes_read(sincedb_value.position) end @@ -79,7 +92,7 @@ def update_existing_sincedb_collection_value(watched_file, sincedb_value) def add_new_value_sincedb_collection(watched_file) sincedb_value = SincedbValue.new(0) sincedb_value.set_watched_file(watched_file) - logger.debug("add_new_value_sincedb_collection: #{watched_file.path}", "position" => sincedb_value.position) + logger.trace("add_new_value_sincedb_collection: #{watched_file.path}", "position" => sincedb_value.position) sincedb_collection.set(watched_file.sincedb_key, sincedb_value) end end diff --git a/lib/filewatch/read_mode/handlers/read_file.rb b/lib/filewatch/read_mode/handlers/read_file.rb index 818e0f7..9219792 100644 --- a/lib/filewatch/read_mode/handlers/read_file.rb +++ b/lib/filewatch/read_mode/handlers/read_file.rb @@ -5,20 +5,21 @@ class ReadFile < Base def handle_specifically(watched_file) if open_file(watched_file) add_or_update_sincedb_collection(watched_file) unless sincedb_collection.member?(watched_file.sincedb_key) - @settings.file_chunk_count.times do + changed = false + logger.trace("reading...", "amount" => watched_file.read_bytesize_description, "filename" => watched_file.filename) + watched_file.read_loop_count.times do break if quit? begin - data = watched_file.file_read(@settings.file_chunk_size) - result = watched_file.buffer_extract(data) # expect BufferExtractResult - logger.info(result.warning, result.additional) unless result.warning.empty? + # expect BufferExtractResult + result = watched_file.read_extract_lines + # read_extract_lines will increment bytes_read + logger.trace(result.warning, result.additional) unless result.warning.empty? + changed = true result.lines.each do |line| watched_file.listener.accept(line) # sincedb position is independent from the watched_file bytes_read sincedb_collection.increment(watched_file.sincedb_key, line.bytesize + @settings.delimiter_byte_size) end - # instead of tracking the bytes_read line by line we need to track by the data read size. - # because we initially seek to the bytes_read not the sincedb position - watched_file.increment_bytes_read(data.bytesize) sincedb_collection.request_disk_flush rescue EOFError # flush the buffer now in case there is no final delimiter @@ -26,8 +27,9 @@ def handle_specifically(watched_file) watched_file.listener.accept(line) unless line.empty? watched_file.listener.eof watched_file.file_close - # unset_watched_file will set sincedb_value.position to be watched_file.bytes_read - sincedb_collection.unset_watched_file(watched_file) + key = watched_file.sincedb_key + sincedb_collection.reading_completed(key) + sincedb_collection.clear_watched_file(key) watched_file.listener.deleted watched_file.unwatch break @@ -35,7 +37,7 @@ def handle_specifically(watched_file) watched_file.listener.error break rescue => e - logger.error("read_to_eof: general error reading #{watched_file.path} - error: #{e.inspect}") + logger.error("read_to_eof: general error reading file", "path" => watched_file.path, "error" => e.inspect, "backtrace" => e.backtrace.take(8)) watched_file.listener.error break end diff --git a/lib/filewatch/read_mode/handlers/read_zip_file.rb b/lib/filewatch/read_mode/handlers/read_zip_file.rb index a0d7fb8..3a0f764 100644 --- a/lib/filewatch/read_mode/handlers/read_zip_file.rb +++ b/lib/filewatch/read_mode/handlers/read_zip_file.rb @@ -13,6 +13,11 @@ def handle_specifically(watched_file) add_or_update_sincedb_collection(watched_file) unless sincedb_collection.member?(watched_file.sincedb_key) # can't really stripe read a zip file, its all or nothing. watched_file.listener.opened + # what do we do about quit when we have just begun reading the zipped file (e.g. pipeline reloading) + # should we track lines read in the sincedb and + # fast forward through the lines until we reach unseen content? + # meaning that we can quit in the middle of a zip file + key = watched_file.sincedb_key begin file_stream = FileInputStream.new(watched_file.path) gzip_stream = GZIPInputStream.new(file_stream) @@ -31,8 +36,8 @@ def handle_specifically(watched_file) logger.error("Cannot decompress the gzip file at path: #{watched_file.path}") watched_file.listener.error else - watched_file.update_bytes_read(watched_file.last_stat_size) - sincedb_collection.unset_watched_file(watched_file) + sincedb_collection.store_last_read(key, watched_file.last_stat_size) + sincedb_collection.request_disk_flush watched_file.listener.deleted watched_file.unwatch ensure @@ -42,7 +47,7 @@ def handle_specifically(watched_file) close_and_ignore_ioexception(gzip_stream) unless gzip_stream.nil? close_and_ignore_ioexception(file_stream) unless file_stream.nil? end - sincedb_collection.unset_watched_file(watched_file) + sincedb_collection.clear_watched_file(key) end private diff --git a/lib/filewatch/read_mode/processor.rb b/lib/filewatch/read_mode/processor.rb index 90ca05b..88e8505 100644 --- a/lib/filewatch/read_mode/processor.rb +++ b/lib/filewatch/read_mode/processor.rb @@ -39,16 +39,16 @@ def read_zip_file(watched_file) @read_zip_file.handle(watched_file) end - def process_closed(watched_files) - # do not process watched_files in the closed state. + def process_all_states(watched_files) + process_watched(watched_files) + return if watch.quit? + process_active(watched_files) end - def process_ignored(watched_files) - # do not process watched_files in the ignored state. - end + private def process_watched(watched_files) - logger.debug("Watched processing") + logger.trace("Watched processing") # Handles watched_files in the watched state. # for a slice of them: # move to the active state @@ -81,7 +81,7 @@ def process_watched(watched_files) end def process_active(watched_files) - logger.debug("Active processing") + logger.trace("Active processing") # Handles watched_files in the active state. watched_files.select {|wf| wf.active? }.each do |watched_file| path = watched_file.path @@ -109,7 +109,7 @@ def common_deleted_reaction(watched_file, action) # file has gone away or we can't read it anymore. watched_file.unwatch deletable_filepaths << watched_file.path - logger.debug("#{action} - stat failed: #{watched_file.path}, removing from collection") + logger.trace("#{action} - stat failed: #{watched_file.path}, removing from collection") end def common_error_reaction(path, error, action) diff --git a/lib/filewatch/settings.rb b/lib/filewatch/settings.rb index 5d7b61f..1ec08e9 100644 --- a/lib/filewatch/settings.rb +++ b/lib/filewatch/settings.rb @@ -22,7 +22,7 @@ def initialize :delimiter => "\n", :file_chunk_size => FILE_READ_SIZE, :max_active => 4095, - :file_chunk_count => FIXNUM_MAX, + :file_chunk_count => MAX_ITERATIONS, :sincedb_clean_after => 14, :exclude => [], :stat_interval => 1, diff --git a/lib/filewatch/sincedb_collection.rb b/lib/filewatch/sincedb_collection.rb index 90b59b0..78c4e12 100644 --- a/lib/filewatch/sincedb_collection.rb +++ b/lib/filewatch/sincedb_collection.rb @@ -39,7 +39,7 @@ def write_if_requested end def write(reason=nil) - logger.debug("caller requested sincedb write (#{reason})") + logger.trace("caller requested sincedb write (#{reason})") sincedb_write end @@ -47,73 +47,72 @@ def open @time_sdb_opened = Time.now.to_f begin path.open do |file| - logger.debug("open: reading from #{path}") + logger.trace("open: reading from #{path}") @serializer.deserialize(file) do |key, value| - logger.debug("open: importing ... '#{key}' => '#{value}'") + logger.trace("open: importing ... '#{key}' => '#{value}'") set_key_value(key, value) end end - logger.debug("open: count of keys read: #{@sincedb.keys.size}") + logger.trace("open: count of keys read: #{@sincedb.keys.size}") rescue => e #No existing sincedb to load - logger.debug("open: error: #{path}: #{e.inspect}") + logger.trace("open: error: #{path}: #{e.inspect}") end end def associate(watched_file) - logger.debug("associate: finding: #{watched_file.path}") + logger.trace("associate: finding", "inode" => watched_file.sincedb_key.inode, "path" => watched_file.path) sincedb_value = find(watched_file) if sincedb_value.nil? # sincedb has no record of this inode # and due to the window handling of many files # this file may not be opened in this session. # a new value will be added when the file is opened - return + logger.trace("associate: unmatched") + return true end + logger.trace("associate: found sincedb record", "filename" => watched_file.filename, "sincedb key" => watched_file.sincedb_key,"sincedb_value" => sincedb_value) if sincedb_value.watched_file.nil? # not associated if sincedb_value.path_in_sincedb.nil? - # old v1 record, assume its the same file handle_association(sincedb_value, watched_file) - return + logger.trace("associate: inode matched but no path in sincedb") + return true end if sincedb_value.path_in_sincedb == watched_file.path # the path on disk is the same as discovered path # and the inode is the same. handle_association(sincedb_value, watched_file) - return + logger.trace("associate: inode and path matched") + return true end # the path on disk is different from discovered unassociated path # but they have the same key (inode) # treat as a new file, a new value will be added when the file is opened - logger.debug("associate: matched but allocated to another - #{sincedb_value}") sincedb_value.clear_watched_file delete(watched_file.sincedb_key) - return + logger.trace("associate: matched but allocated to another") + return true end if sincedb_value.watched_file.equal?(watched_file) # pointer equals - logger.debug("associate: already associated - #{sincedb_value}, for path: #{watched_file.path}") - return + logger.trace("associate: already associated") + return true end - # sincedb_value.watched_file is not the discovered watched_file but they have the same key (inode) - # this means that the filename was changed during this session. - # logout the history of the old sincedb_value and remove it - # a new value will be added when the file is opened - # TODO notify about done-ness of old sincedb_value and watched_file - old_watched_file = sincedb_value.watched_file - sincedb_value.clear_watched_file - if logger.debug? - logger.debug("associate: matched but allocated to another - #{sincedb_value}") - logger.debug("associate: matched but allocated to another - old watched_file history - #{old_watched_file.recent_state_history.join(', ')}") - logger.debug("associate: matched but allocated to another - DELETING value at key `#{old_watched_file.sincedb_key}`") - end - delete(old_watched_file.sincedb_key) + # sincedb_value.watched_file is not this discovered watched_file but they have the same key (inode) + # this means that the filename path was changed during this session. + # renamed file can be discovered... + # before the original is detected as deleted: state is `active` + # after the original is detected as deleted but before it is actually deleted: state is `delayed_delete` + # after the original is deleted + # are not yet in the delete phase, let this play out + existing_watched_file = sincedb_value.watched_file + logger.trace("----------------- >> associate: the found sincedb_value has a watched_file - this is a rename", "this watched_file details" => watched_file.details, "other watched_file details" => existing_watched_file.details) + watched_file.rotation_in_progress + true end def find(watched_file) - get(watched_file.sincedb_key).tap do |obj| - logger.debug("find for path: #{watched_file.path}, found: '#{!obj.nil?}'") - end + get(watched_file.sincedb_key) end def member?(key) @@ -124,6 +123,11 @@ def get(key) @sincedb[key] end + def set(key, value) + @sincedb[key] = value + value + end + def delete(key) @sincedb.delete(key) end @@ -144,11 +148,23 @@ def set_watched_file(key, watched_file) @sincedb[key].set_watched_file(watched_file) end - def unset_watched_file(watched_file) + def watched_file_deleted(watched_file) return unless member?(watched_file.sincedb_key) get(watched_file.sincedb_key).unset_watched_file end + def store_last_read(key, pos) + @sincedb[key].update_position(pos) + end + + def clear_watched_file(key) + @sincedb[key].clear_watched_file + end + + def reading_completed(key) + @sincedb[key].reading_completed + end + def clear @sincedb.clear end @@ -157,11 +173,6 @@ def keys @sincedb.keys end - def set(key, value) - @sincedb[key] = value - value - end - def watched_file_unset?(key) return false unless member?(key) get(key).watched_file.nil? @@ -182,33 +193,36 @@ def handle_association(sincedb_value, watched_file) watched_file.update_bytes_read(sincedb_value.position) sincedb_value.set_watched_file(watched_file) watched_file.initial_completed - watched_file.ignore if watched_file.all_read? + if watched_file.all_read? + watched_file.ignore + logger.trace("handle_association fully read, ignoring.....", "watched file" => watched_file.details, "sincedb value" => sincedb_value) + end end def set_key_value(key, value) if @time_sdb_opened < value.last_changed_at_expires(@settings.sincedb_expiry_duration) - logger.debug("open: setting #{key.inspect} to #{value.inspect}") + logger.trace("open: setting #{key.inspect} to #{value.inspect}") set(key, value) else - logger.debug("open: record has expired, skipping: #{key.inspect} #{value.inspect}") + logger.trace("open: record has expired, skipping: #{key.inspect} #{value.inspect}") end end def sincedb_write(time = Time.now.to_i) - logger.debug("sincedb_write: to: #{path}") + logger.trace("sincedb_write: to: #{path}") begin @write_method.call @serializer.expired_keys.each do |key| @sincedb[key].unset_watched_file delete(key) - logger.debug("sincedb_write: cleaned", "key" => "'#{key}'") + logger.trace("sincedb_write: cleaned", "key" => "'#{key}'") end @sincedb_last_write = time @write_requested = false rescue Errno::EACCES # no file handles free perhaps # maybe it will work next time - logger.debug("sincedb_write: error: #{path}: #{$!}") + logger.trace("sincedb_write: error: #{path}: #{$!}") end end diff --git a/lib/filewatch/sincedb_value.rb b/lib/filewatch/sincedb_value.rb index f97c44a..56ac484 100644 --- a/lib/filewatch/sincedb_value.rb +++ b/lib/filewatch/sincedb_value.rb @@ -66,6 +66,12 @@ def clear_watched_file @watched_file = nil end + def reading_completed + touch + @path_in_sincedb = @watched_file.path + @position = @watched_file.bytes_read + end + def unset_watched_file # called in read mode only because we flushed any remaining bytes as a final line. # cache the position diff --git a/lib/filewatch/stat/generic.rb b/lib/filewatch/stat/generic.rb new file mode 100644 index 0000000..6d83a72 --- /dev/null +++ b/lib/filewatch/stat/generic.rb @@ -0,0 +1,34 @@ +# encoding: utf-8 + +module FileWatch module Stat + class Generic + + attr_reader :identifier, :inode, :modified_at, :size, :inode_struct + + def initialize(source) + @source = source + @identifier = nil + restat + end + + def add_identifier(identifier) self; end + + def restat + @inner_stat = @source.stat + @inode = @inner_stat.ino.to_s + @modified_at = @inner_stat.mtime.to_f + @size = @inner_stat.size + @dev_major = @inner_stat.dev_major + @dev_minor = @inner_stat.dev_minor + @inode_struct = InodeStruct.new(@inode, @dev_major, @dev_minor) + end + + def windows? + false + end + + def inspect + "" + end + end +end end diff --git a/lib/filewatch/stat/windows_path.rb b/lib/filewatch/stat/windows_path.rb new file mode 100644 index 0000000..de773a4 --- /dev/null +++ b/lib/filewatch/stat/windows_path.rb @@ -0,0 +1,32 @@ +# encoding: utf-8 + +module FileWatch module Stat + class WindowsPath + + attr_reader :identifier, :inode, :modified_at, :size, :inode_struct + + def initialize(source) + @source = source + @inode = Winhelper.identifier_from_path(@source.to_path) + @dev_major = 0 + @dev_minor = 0 + # in windows the dev hi and low are in the identifier + @inode_struct = InodeStruct.new(@inode, @dev_major, @dev_minor) + restat + end + + def restat + @inner_stat = @source.stat + @modified_at = @inner_stat.mtime.to_f + @size = @inner_stat.size + end + + def windows? + true + end + + def inspect + "" + end + end +end end diff --git a/lib/filewatch/tail_mode/handlers/base.rb b/lib/filewatch/tail_mode/handlers/base.rb index a769f5a..783eb4e 100644 --- a/lib/filewatch/tail_mode/handlers/base.rb +++ b/lib/filewatch/tail_mode/handlers/base.rb @@ -13,7 +13,7 @@ def initialize(sincedb_collection, observer, settings) end def handle(watched_file) - logger.debug("handling: #{watched_file.path}") + logger.trace("handling: #{watched_file.filename}") unless watched_file.has_listener? watched_file.set_listener(@observer) end @@ -31,6 +31,7 @@ def update_existing_specifically(watched_file, sincedb_value) private def read_to_eof(watched_file) + logger.trace("reading...", "amount" => watched_file.read_bytesize_description, "filename" => watched_file.filename) changed = false # from a real config (has 102 file inputs) # -- This cfg creates a file input for every log file to create a dedicated file pointer and read all file simultaneously @@ -39,20 +40,16 @@ def read_to_eof(watched_file) # we enable the pseudo parallel processing of each file. # user also has the option to specify a low `stat_interval` and a very high `discover_interval`to respond # quicker to changing files and not allowing too much content to build up before reading it. - @settings.file_chunk_count.times do + watched_file.read_loop_count.times do begin - data = watched_file.file_read(@settings.file_chunk_size) - result = watched_file.buffer_extract(data) # expect BufferExtractResult - logger.info(result.warning, result.additional) unless result.warning.empty? + result = watched_file.read_extract_lines # expect BufferExtractResult + logger.trace(result.warning, result.additional) unless result.warning.empty? changed = true result.lines.each do |line| watched_file.listener.accept(line) # sincedb position is now independent from the watched_file bytes_read sincedb_collection.increment(watched_file.sincedb_key, line.bytesize + @settings.delimiter_byte_size) end - # instead of tracking the bytes_read line by line we need to track by the data read size. - # because we seek to the bytes_read not the sincedb position - watched_file.increment_bytes_read(data.bytesize) rescue EOFError # it only makes sense to signal EOF in "read" mode not "tail" break @@ -70,7 +67,7 @@ def read_to_eof(watched_file) def open_file(watched_file) return true if watched_file.file_open? - logger.debug("opening #{watched_file.path}") + logger.trace("opening #{watched_file.filename}") begin watched_file.open rescue @@ -82,43 +79,64 @@ def open_file(watched_file) logger.warn("failed to open #{watched_file.path}: #{$!.inspect}, #{$!.backtrace.take(3)}") watched_file.last_open_warning_at = now else - logger.debug("suppressed warning for `failed to open` #{watched_file.path}: #{$!.inspect}") + logger.trace("suppressed warning for `failed to open` #{watched_file.path}: #{$!.inspect}") end watched_file.watch # set it back to watch so we can try it again - end - if watched_file.file_open? - watched_file.listener.opened - true else - false + watched_file.listener.opened end + watched_file.file_open? end def add_or_update_sincedb_collection(watched_file) sincedb_value = @sincedb_collection.find(watched_file) if sincedb_value.nil? - add_new_value_sincedb_collection(watched_file) + sincedb_value = add_new_value_sincedb_collection(watched_file) + watched_file.initial_completed elsif sincedb_value.watched_file == watched_file update_existing_sincedb_collection_value(watched_file, sincedb_value) + watched_file.initial_completed else - logger.warn? && logger.warn("mismatch on sincedb_value.watched_file, this should have been handled by Discoverer") + msg = "add_or_update_sincedb_collection: found sincedb record" + logger.trace(msg, + "sincedb key" => watched_file.sincedb_key, + "sincedb value" => sincedb_value + ) + # detected a rotation, Discoverer can't handle this because this watched file is not a new discovery. + # we must handle it here, by transferring state and have the sincedb value track this watched file + # rotate_as_file and rotate_from will switch the sincedb key to the inode that the path is now pointing to + # and pickup the sincedb_value from before. + msg = "add_or_update_sincedb_collection: the found sincedb_value has a watched_file - this is a rename, switching inode to this watched file" + logger.trace(msg) + existing_watched_file = sincedb_value.watched_file + if existing_watched_file.nil? + sincedb_value.set_watched_file(watched_file) + logger.trace("add_or_update_sincedb_collection: switching as new file") + watched_file.rotate_as_file + watched_file.update_bytes_read(sincedb_value.position) + else + sincedb_value.set_watched_file(watched_file) + logger.trace("add_or_update_sincedb_collection: switching from...", "watched_file details" => watched_file.details) + watched_file.rotate_from(existing_watched_file) + end end - watched_file.initial_completed + sincedb_value end def update_existing_sincedb_collection_value(watched_file, sincedb_value) - logger.debug("update_existing_sincedb_collection_value: #{watched_file.path}, last value #{sincedb_value.position}, cur size #{watched_file.last_stat_size}") + logger.trace("update_existing_sincedb_collection_value: #{watched_file.filename}, last value #{sincedb_value.position}, cur size #{watched_file.last_stat_size}") update_existing_specifically(watched_file, sincedb_value) end def add_new_value_sincedb_collection(watched_file) sincedb_value = get_new_value_specifically(watched_file) - logger.debug("add_new_value_sincedb_collection: #{watched_file.path}", "position" => sincedb_value.position) + logger.trace("add_new_value_sincedb_collection", "position" => sincedb_value.position, "watched_file details" => watched_file.details) sincedb_collection.set(watched_file.sincedb_key, sincedb_value) + sincedb_value end def get_new_value_specifically(watched_file) - position = @settings.start_new_files_at == :beginning ? 0 : watched_file.last_stat_size + position = watched_file.position_for_new_sincedb_value value = SincedbValue.new(position) value.set_watched_file(watched_file) watched_file.update_bytes_read(position) diff --git a/lib/filewatch/tail_mode/handlers/create.rb b/lib/filewatch/tail_mode/handlers/create.rb index 2b89c11..167bc2c 100644 --- a/lib/filewatch/tail_mode/handlers/create.rb +++ b/lib/filewatch/tail_mode/handlers/create.rb @@ -10,8 +10,7 @@ def handle_specifically(watched_file) def update_existing_specifically(watched_file, sincedb_value) # sincedb_value is the source of truth - position = sincedb_value.position - watched_file.update_bytes_read(position) + watched_file.update_bytes_read(sincedb_value.position) end end end end end diff --git a/lib/filewatch/tail_mode/handlers/create_initial.rb b/lib/filewatch/tail_mode/handlers/create_initial.rb index 65c385b..8e01f9d 100644 --- a/lib/filewatch/tail_mode/handlers/create_initial.rb +++ b/lib/filewatch/tail_mode/handlers/create_initial.rb @@ -4,6 +4,7 @@ module FileWatch module TailMode module Handlers class CreateInitial < Base def handle_specifically(watched_file) if open_file(watched_file) + logger.trace("handle_specifically opened file handle: #{watched_file.file.fileno}, path: #{watched_file.filename}") add_or_update_sincedb_collection(watched_file) end end @@ -13,7 +14,7 @@ def update_existing_specifically(watched_file, sincedb_value) if @settings.start_new_files_at == :beginning position = 0 end - logger.debug("update_existing_specifically - #{watched_file.path}: seeking to #{position}") + logger.trace("update_existing_specifically - #{watched_file.path}: seeking to #{position}") watched_file.update_bytes_read(position) sincedb_value.update_position(position) end diff --git a/lib/filewatch/tail_mode/handlers/delete.rb b/lib/filewatch/tail_mode/handlers/delete.rb index 39e4c74..cc7a79b 100644 --- a/lib/filewatch/tail_mode/handlers/delete.rb +++ b/lib/filewatch/tail_mode/handlers/delete.rb @@ -2,9 +2,21 @@ module FileWatch module TailMode module Handlers class Delete < Base + DATA_LOSS_WARNING = "watched file path was deleted or rotated before all content was read, if the file is found again it will be read from the last position" def handle_specifically(watched_file) + # TODO consider trying to find the renamed file - it will have the same inode. + # Needs a rotate scheme rename hint from user e.g. "-YYYY-MM-DD-N." or "..N" + # send the found content to the same listener (stream identity) + logger.trace("info", + "watched_file details" => watched_file.details, + "path" => watched_file.path) + if watched_file.bytes_unread > 0 + logger.warn(DATA_LOSS_WARNING, "unread_bytes" => watched_file.bytes_unread, "path" => watched_file.path) + end watched_file.listener.deleted - sincedb_collection.unset_watched_file(watched_file) + # no need to worry about data in the buffer + # if found it will be associated by inode and read from last position + sincedb_collection.watched_file_deleted(watched_file) watched_file.file_close end end diff --git a/lib/filewatch/tail_mode/handlers/grow.rb b/lib/filewatch/tail_mode/handlers/grow.rb index 826017e..4c93fe9 100644 --- a/lib/filewatch/tail_mode/handlers/grow.rb +++ b/lib/filewatch/tail_mode/handlers/grow.rb @@ -4,7 +4,6 @@ module FileWatch module TailMode module Handlers class Grow < Base def handle_specifically(watched_file) watched_file.file_seek(watched_file.bytes_read) - logger.debug("reading to eof: #{watched_file.path}") read_to_eof(watched_file) end end diff --git a/lib/filewatch/tail_mode/handlers/shrink.rb b/lib/filewatch/tail_mode/handlers/shrink.rb index 9a7f0f0..54fc3ed 100644 --- a/lib/filewatch/tail_mode/handlers/shrink.rb +++ b/lib/filewatch/tail_mode/handlers/shrink.rb @@ -3,18 +3,18 @@ module FileWatch module TailMode module Handlers class Shrink < Base def handle_specifically(watched_file) - add_or_update_sincedb_collection(watched_file) + sdbv = add_or_update_sincedb_collection(watched_file) watched_file.file_seek(watched_file.bytes_read) - logger.debug("reading to eof: #{watched_file.path}") read_to_eof(watched_file) + logger.trace("handle_specifically: after read_to_eof", "watched file" => watched_file.details, "sincedb value" => sdbv) end def update_existing_specifically(watched_file, sincedb_value) # we have a match but size is smaller # set all to zero - logger.debug("update_existing_specifically: #{watched_file.path}: was truncated seeking to beginning") - watched_file.update_bytes_read(0) if watched_file.bytes_read != 0 + watched_file.reset_bytes_unread sincedb_value.update_position(0) + logger.trace("update_existing_specifically: was truncated seeking to beginning", "watched file" => watched_file.details, "sincedb value" => sincedb_value) end end end end end diff --git a/lib/filewatch/tail_mode/handlers/unignore.rb b/lib/filewatch/tail_mode/handlers/unignore.rb index 07cace1..7b510fe 100644 --- a/lib/filewatch/tail_mode/handlers/unignore.rb +++ b/lib/filewatch/tail_mode/handlers/unignore.rb @@ -6,15 +6,16 @@ class Unignore < Base # before any other handling has been done # at a minimum we create or associate a sincedb value def handle_specifically(watched_file) - add_or_update_sincedb_collection(watched_file) unless sincedb_collection.member?(watched_file.sincedb_key) + add_or_update_sincedb_collection(watched_file) end def get_new_value_specifically(watched_file) # for file initially ignored their bytes_read was set to stat.size # use this value not the `start_new_files_at` for the position - # logger.debug("get_new_value_specifically", "watched_file" => watched_file.inspect) + # logger.trace("get_new_value_specifically", "watched_file" => watched_file.inspect) SincedbValue.new(watched_file.bytes_read).tap do |val| val.set_watched_file(watched_file) + logger.trace("-------------------- >>>>> get_new_value_specifically: unignore", "watched file" => watched_file.details, "sincedb value" => val) end end @@ -25,6 +26,7 @@ def update_existing_specifically(watched_file, sincedb_value) # we will handle grow or shrink # for now we seek to where we were before the file got ignored (grow) # or to the start (shrink) + logger.trace("-------------------- >>>>> update_existing_specifically: unignore", "watched file" => watched_file.details, "sincedb value" => sincedb_value) position = 0 if watched_file.shrunk? watched_file.update_bytes_read(0) diff --git a/lib/filewatch/tail_mode/processor.rb b/lib/filewatch/tail_mode/processor.rb index 8291280..ed990c7 100644 --- a/lib/filewatch/tail_mode/processor.rb +++ b/lib/filewatch/tail_mode/processor.rb @@ -34,6 +34,7 @@ def add_watch(watch) end def initialize_handlers(sincedb_collection, observer) + @sincedb_collection = sincedb_collection @create_initial = Handlers::CreateInitial.new(sincedb_collection, observer, @settings) @create = Handlers::Create.new(sincedb_collection, observer, @settings) @grow = Handlers::Grow.new(sincedb_collection, observer, @settings) @@ -71,80 +72,148 @@ def unignore(watched_file) @unignore.handle(watched_file) end + def process_all_states(watched_files) + process_closed(watched_files) + return if watch.quit? + process_ignored(watched_files) + return if watch.quit? + process_delayed_delete(watched_files) + return if watch.quit? + process_restat_for_watched_and_active(watched_files) + return if watch.quit? + process_rotation_in_progress(watched_files) + return if watch.quit? + process_watched(watched_files) + return if watch.quit? + process_active(watched_files) + end + + private + def process_closed(watched_files) - logger.debug("Closed processing") + # logger.trace("Closed processing") # Handles watched_files in the closed state. # if its size changed it is put into the watched state watched_files.select {|wf| wf.closed? }.each do |watched_file| - path = watched_file.path - begin - watched_file.restat + common_restat_with_delay(watched_file, "Closed") do + # it won't do this if rotation is detected if watched_file.size_changed? # if the closed file changed, move it to the watched state # not to active state because we want to respect the active files window. watched_file.watch end - rescue Errno::ENOENT - # file has gone away or we can't read it anymore. - common_deleted_reaction(watched_file, "Closed") - rescue => e - common_error_reaction(path, e, "Closed") end break if watch.quit? end end def process_ignored(watched_files) - logger.debug("Ignored processing") + # logger.trace("Ignored processing") # Handles watched_files in the ignored state. # if its size changed: # put it in the watched state # invoke unignore watched_files.select {|wf| wf.ignored? }.each do |watched_file| - path = watched_file.path - begin - watched_file.restat + common_restat_with_delay(watched_file, "Ignored") do + # it won't do this if rotation is detected if watched_file.size_changed? watched_file.watch unignore(watched_file) end - rescue Errno::ENOENT - # file has gone away or we can't read it anymore. - common_deleted_reaction(watched_file, "Ignored") - rescue => e - common_error_reaction(path, e, "Ignored") end break if watch.quit? end end + def process_delayed_delete(watched_files) + # defer the delete to one loop later to ensure that the stat really really can't find a renamed file + # because a `stat` can be called right in the middle of the rotation rename cascade + logger.trace("Delayed Delete processing") + watched_files.select {|wf| wf.delayed_delete?}.each do |watched_file| + logger.trace(">>> Delayed Delete", "path" => watched_file.filename) + common_restat_without_delay(watched_file, ">>> Delayed Delete") do + logger.trace(">>> Delayed Delete: file at path found again", "watched_file" => watched_file.details) + watched_file.file_at_path_found_again + end + end + end + + def process_restat_for_watched_and_active(watched_files) + # do restat on all watched and active states once now. closed and ignored have been handled already + logger.trace("Watched + Active restat processing") + watched_files.select {|wf| wf.watched? || wf.active?}.each do |watched_file| + common_restat_with_delay(watched_file, "Watched") + end + end + + def process_rotation_in_progress(watched_files) + logger.trace("Rotation In Progress processing") + watched_files.select {|wf| wf.rotation_in_progress?}.each do |watched_file| + if !watched_file.all_read? + if watched_file.file_open? + # rotated file but original opened file is not fully read + # we need to keep reading the open file, if we close it we lose it because the path is now pointing at a different file. + logger.trace(">>> Rotation In Progress - inode change detected and original content is not fully read, reading all", "watched_file details" => watched_file.details) + # need to fully read open file while we can + watched_file.set_depth_first_read_loop + grow(watched_file) + watched_file.set_user_defined_read_loop + else + logger.warn(">>> Rotation In Progress - inode change detected and original content is not fully read, file is closed and path points to new content", "watched_file details" => watched_file.details) + end + end + current_key = watched_file.sincedb_key + sdb_value = @sincedb_collection.get(current_key) + potential_key = watched_file.stat_sincedb_key + potential_sdb_value = @sincedb_collection.get(potential_key) + logger.trace(">>> Rotation In Progress", "watched_file" => watched_file.details, "found_sdb_value" => sdb_value, "potential_key" => potential_key, "potential_sdb_value" => potential_sdb_value) + if potential_sdb_value.nil? + if sdb_value.nil? + logger.trace("---------- >>> Rotation In Progress: rotating as initial file, no potential sincedb value AND no found sincedb value") + watched_file.rotate_as_initial_file + else + logger.trace("---------- >>>> Rotation In Progress: rotating as existing file, no potential sincedb value BUT found sincedb value") + watched_file.rotate_as_file + sdb_value.clear_watched_file + end + new_sdb_value = SincedbValue.new(0) + new_sdb_value.set_watched_file(watched_file) + @sincedb_collection.set(potential_key, new_sdb_value) + else + other_watched_file = potential_sdb_value.watched_file + if other_watched_file.nil? + logger.trace("---------- >>>> Rotation In Progress: rotating as existing file WITH potential sincedb value that does not have a watched file reference !!!!!!!!!!!!!!!!!") + watched_file.rotate_as_file(potential_sdb_value.position) + sdb_value.clear_watched_file unless sdb_value.nil? + potential_sdb_value.set_watched_file(watched_file) + else + logger.trace("---------- >>>> Rotation In Progress: rotating from...", "this watched_file details" => watched_file.details, "other watched_file details" => other_watched_file.details) + watched_file.rotate_from(other_watched_file) + sdb_value.clear_watched_file unless sdb_value.nil? + potential_sdb_value.set_watched_file(watched_file) + end + end + logger.trace("---------- >>>> Rotation In Progress: after handling rotation", "this watched_file details" => watched_file.details, "sincedb_value" => (potential_sdb_value || sdb_value)) + end + end + def process_watched(watched_files) - logger.debug("Watched processing") # Handles watched_files in the watched state. # for a slice of them: # move to the active state # and we allow the block to open the file and create a sincedb collection record if needed # some have never been active and some have # those that were active before but are watched now were closed under constraint - + logger.trace("Watched processing") # how much of the max active window is available to_take = @settings.max_active - watched_files.count{|wf| wf.active?} if to_take > 0 watched_files.select {|wf| wf.watched?}.take(to_take).each do |watched_file| - path = watched_file.path - begin - watched_file.restat - watched_file.activate - if watched_file.initial? - create_initial(watched_file) - else - create(watched_file) - end - rescue Errno::ENOENT - # file has gone away or we can't read it anymore. - common_deleted_reaction(watched_file, "Watched") - rescue => e - common_error_reaction(path, e, "Watched") + watched_file.activate + if watched_file.initial? + create_initial(watched_file) + else + create(watched_file) end break if watch.quit? end @@ -159,51 +228,71 @@ def process_watched(watched_files) end def process_active(watched_files) - logger.debug("Active processing") + # logger.trace("Active processing") # Handles watched_files in the active state. - # it has been read once - unless they were empty at the time + # files have been opened at this point watched_files.select {|wf| wf.active? }.each do |watched_file| - path = watched_file.path - begin - watched_file.restat - rescue Errno::ENOENT - # file has gone away or we can't read it anymore. - common_deleted_reaction(watched_file, "Active") - next - rescue => e - common_error_reaction(path, e, "Active") - next - end break if watch.quit? + path = watched_file.filename if watched_file.grown? - logger.debug("Active - file grew: #{path}: new size is #{watched_file.last_stat_size}, old size #{watched_file.bytes_read}") + logger.trace("Active - file grew: #{path}: new size is #{watched_file.last_stat_size}, bytes read #{watched_file.bytes_read}") grow(watched_file) elsif watched_file.shrunk? + if watched_file.bytes_unread > 0 + logger.warn("Active - shrunk: DATA LOSS!! truncate detected with #{watched_file.bytes_unread} unread bytes: #{path}") + end # we don't update the size here, its updated when we actually read - logger.debug("Active - file shrunk #{path}: new size is #{watched_file.last_stat_size}, old size #{watched_file.bytes_read}") + logger.trace("Active - file shrunk #{path}: new size is #{watched_file.last_stat_size}, old size #{watched_file.bytes_read}") shrink(watched_file) else # same size, do nothing + logger.trace("Active - no change", "watched_file" => watched_file.details) end # can any active files be closed to make way for waiting files? if watched_file.file_closable? - logger.debug("Watch each: active: file expired: #{path}") + logger.trace("Watch each: active: file expired: #{path}") timeout(watched_file) watched_file.close end end end - def common_deleted_reaction(watched_file, action) - # file has gone away or we can't read it anymore. - watched_file.unwatch - delete(watched_file) - deletable_filepaths << watched_file.path - logger.debug("#{action} - stat failed: #{watched_file.path}, removing from collection") + def common_restat_with_delay(watched_file, action, &block) + common_restat(watched_file, action, true, &block) end - def common_error_reaction(path, error, action) - logger.error("#{action} - other error #{path}: (#{error.message}, #{error.backtrace.take(8).inspect})") + def common_restat_without_delay(watched_file, action, &block) + common_restat(watched_file, action, false, &block) + end + + def common_restat(watched_file, action, delay, &block) + all_ok = true + begin + watched_file.restat + if watched_file.rotation_in_progress? + logger.trace("-------------------- >>>>> restat - rotation_detected", "watched_file details" => watched_file.details, "new sincedb key" => watched_file.stat_sincedb_key) + # don't yield to closed and ignore processing + else + yield if block_given? + end + rescue Errno::ENOENT + if delay + logger.trace("#{action} - delaying the stat fail on: #{watched_file.filename}") + watched_file.delay_delete + else + # file has gone away or we can't read it anymore. + logger.trace("#{action} - after a delay, really can't find this file: #{watched_file.filename}") + watched_file.unwatch + logger.trace("#{action} - removing from collection: #{watched_file.filename}") + delete(watched_file) + deletable_filepaths << watched_file.path + all_ok = false + end + rescue => e + logger.error("#{action} - other error #{watched_file.path}: (#{e.message}, #{e.backtrace.take(8).inspect})") + all_ok = false + end + all_ok end end end end diff --git a/lib/filewatch/watch.rb b/lib/filewatch/watch.rb index 6256ae4..ea5b9f7 100644 --- a/lib/filewatch/watch.rb +++ b/lib/filewatch/watch.rb @@ -10,11 +10,8 @@ class Watch def initialize(discoverer, watched_files_collection, settings) @settings = settings - # watch and iterate_on_state can be called from different threads. - @lock = Mutex.new # we need to be threadsafe about the quit mutation - @quit = false - @quit_lock = Mutex.new + @quit = Concurrent::AtomicBoolean.new(false) @lastwarn_max_files = 0 @discoverer = discoverer @watched_files_collection = watched_files_collection @@ -27,17 +24,13 @@ def add_processor(processor) end def watch(path) - synchronized do - @discoverer.add_path(path) - end + @discoverer.add_path(path) # don't return whatever @discoverer.add_path returns return true end def discover - synchronized do - @discoverer.discover - end + @discoverer.discover # don't return whatever @discoverer.discover returns return true end @@ -60,6 +53,7 @@ def subscribe(observer, sincedb_collection) break if quit? sleep(@settings.stat_interval) end + sincedb_collection.write_if_requested # does nothing if no requests to write were lodged. @watched_files_collection.close_all end # def subscribe @@ -67,42 +61,28 @@ def subscribe(observer, sincedb_collection) # differently from Tail mode - see the ReadMode::Processor and TailMode::Processor def iterate_on_state return if @watched_files_collection.empty? - synchronized do - begin - # creates this snapshot of watched_file values just once - watched_files = @watched_files_collection.values - @processor.process_closed(watched_files) - return if quit? - @processor.process_ignored(watched_files) - return if quit? - @processor.process_watched(watched_files) - return if quit? - @processor.process_active(watched_files) - ensure - @watched_files_collection.delete(@processor.deletable_filepaths) - @processor.deletable_filepaths.clear - end + begin + # creates this snapshot of watched_file values just once + watched_files = @watched_files_collection.values + @processor.process_all_states(watched_files) + ensure + @watched_files_collection.delete(@processor.deletable_filepaths) + @processor.deletable_filepaths.clear end end # def each def quit - @quit_lock.synchronize do - @quit = true - end - end # def quit + @quit.make_true + end def quit? - @quit_lock.synchronize { @quit } + @quit.true? end private - def synchronized(&block) - @lock.synchronize { block.call } - end - def reset_quit - @quit_lock.synchronize { @quit = false } + @quit.make_false end end end diff --git a/lib/filewatch/watched_file.rb b/lib/filewatch/watched_file.rb index a80a93c..78873a8 100644 --- a/lib/filewatch/watched_file.rb +++ b/lib/filewatch/watched_file.rb @@ -2,31 +2,162 @@ module FileWatch class WatchedFile - include InodeMixin # see bootstrap.rb at `if LogStash::Environment.windows?` + PATH_BASED_STAT = 0 + IO_BASED_STAT = 1 - attr_reader :bytes_read, :state, :file, :buffer, :recent_states - attr_reader :path, :filestat, :accessed_at, :modified_at, :pathname - attr_reader :sdb_key_v1, :last_stat_size, :listener + attr_reader :bytes_read, :state, :file, :buffer, :recent_states, :bytes_unread + attr_reader :path, :accessed_at, :modified_at, :pathname, :filename + attr_reader :listener, :read_loop_count, :read_chunk_size, :stat, :read_bytesize_description attr_accessor :last_open_warning_at # this class represents a file that has been discovered + # path based stat is taken at discovery def initialize(pathname, stat, settings) @settings = settings @pathname = Pathname.new(pathname) # given arg pathname might be a string or a Pathname object @path = @pathname.to_path - @bytes_read = 0 - @last_stat_size = 0 - # the prepare_inode method is sourced from the mixed module above - @sdb_key_v1 = InodeStruct.new(*prepare_inode(path, stat)) + @filename = @pathname.basename.to_s + full_state_reset(stat) + watch + set_user_defined_read_loop + set_accessed_at + end + + def no_restat_reset + full_state_reset(@stat) + end + + def full_state_reset(this_stat = nil) + if this_stat.nil? + begin + this_stat = PathStatClass.new(pathname) + rescue Errno::ENOENT + delay_delete + return + end + end + @bytes_read = 0 # tracks bytes read from the open file or initialized from a matched sincedb_value off disk. + @bytes_unread = 0 # tracks bytes not yet read from the open file. So we can warn on shrink when unread bytes are seen. + file_close + set_stat(this_stat) + @listener = nil + @last_open_warning_at = nil # initial as true means we have not associated this watched_file with a previous sincedb value yet. # and we should read from the beginning if necessary @initial = true @recent_states = [] # keep last 8 states, managed in set_state - @state = :watched - set_stat(stat) # can change @last_stat_size + # the prepare_inode method is sourced from the mixed module above + watch if active? || @state.nil? + end + + def rotate_from(other) + # move all state from other to this one + set_user_defined_read_loop + file_close + @bytes_read = other.bytes_read + @bytes_unread = other.bytes_unread @listener = nil + @initial = false + @recent_states = other.recent_states + @accessed_at = other.accessed_at + if !other.delayed_delete? + # we don't know if a file exists at the other.path yet + # so no reset + other.full_state_reset + end + set_stat PathStatClass.new(pathname) + ignore + end + + def set_stat(stat) + @stat = stat + @size = @stat.size + @sdb_key_v1 = @stat.inode_struct + end + + def rotate_as_initial_file + # rotation, when no sincedb record exists for new inode - we have never seen this inode before. + rotate_as_file + @initial = true + end + + def rotate_as_file(bytes_read = 0) + # rotation, when a sincedb record exists for new inode, but no watched file to rotate from + # probably caused by a deletion detected in the middle of the rename cascade + # RARE due to delayed_delete - there would have to be a large time span between the renames. + @bytes_read = bytes_read # tracks bytes read from the open file or initialized from a matched sincedb_value off disk. + @bytes_unread = 0 # tracks bytes not yet read from the open file. So we can warn on shrink when unread bytes are seen. @last_open_warning_at = nil - set_accessed_at + # initial as true means we have not associated this watched_file with a previous sincedb value yet. + # and we should read from the beginning if necessary + @initial = false + @recent_states = [] # keep last 8 states, managed in set_state + set_stat(PathStatClass.new(pathname)) + reopen + watch + end + + def stat_sincedb_key + @stat.inode_struct + end + + def rotation_detected? + stat_sincedb_key != sincedb_key + end + + def restat + @stat.restat + if rotation_detected? + # switch to new state now + rotation_in_progress + else + @size = @stat.size + update_bytes_unread + end + end + + def modified_at + @stat.modified_at + end + + def position_for_new_sincedb_value + if @initial + # this file was found in first discovery + @settings.start_new_files_at == :beginning ? 0 : last_stat_size + else + # always start at the beginning if found after first discovery + 0 + end + end + + def last_stat_size + @stat.size + end + + def current_size + @size + end + + def shrunk? + @size < @bytes_read + end + + def grown? + @size > @bytes_read + end + + def size_changed? + # called from closed and ignored + # before the last stat was taken file should be fully read. + @size != @bytes_read + end + + def all_read? + @bytes_read >= @size + end + + def file_at_path_found_again + restore_previous_state end def set_listener(observer) @@ -61,12 +192,11 @@ def compressed? @path.end_with?('.gz','.gzip') end - def size_changed? - @last_stat_size != bytes_read - end - - def all_read? - @last_stat_size == bytes_read + def reopen + if file_open? + file_close + open + end end def open @@ -88,9 +218,9 @@ def file_seek(amount, whence = IO::SEEK_SET) @file.sysseek(amount, whence) end - def file_read(amount) + def file_read(amount = nil) set_accessed_at - @file.sysread(amount) + @file.sysread(amount || @read_chunk_size) end def file_open? @@ -101,6 +231,13 @@ def reset_buffer @buffer.flush end + def read_extract_lines + data = file_read + result = buffer_extract(data) + increment_bytes_read(data.bytesize) + result + end + def buffer_extract(data) warning, additional = "", {} lines = @buffer.extract(data) @@ -112,7 +249,7 @@ def buffer_extract(data) additional["delimiter"] = @settings.delimiter additional["read_position"] = @bytes_read additional["bytes_read_count"] = data.bytesize - additional["last_known_file_size"] = @last_stat_size + additional["last_known_file_size"] = last_stat_size additional["file_path"] = @path end BufferExtractResult.new(lines, warning, additional) @@ -121,19 +258,19 @@ def buffer_extract(data) def increment_bytes_read(delta) return if delta.nil? @bytes_read += delta + update_bytes_unread + @bytes_read end def update_bytes_read(total_bytes_read) return if total_bytes_read.nil? @bytes_read = total_bytes_read + update_bytes_unread + @bytes_read end - def update_path(_path) - @path = _path - end - - def update_stat(st) - set_stat(st) + def rotation_in_progress + set_state :rotation_in_progress end def activate @@ -142,7 +279,11 @@ def activate def ignore set_state :ignored - @bytes_read = @filestat.size + end + + def ignore_as_unread + ignore + @bytes_read = @size end def close @@ -157,10 +298,26 @@ def unwatch set_state :unwatched end + def delay_delete + set_state :delayed_delete + end + + def restore_previous_state + set_state @recent_states.pop + end + + def rotation_in_progress? + @state == :rotation_in_progress + end + def active? @state == :active end + def delayed_delete? + @state == :delayed_delete + end + def ignored? @state == :ignored end @@ -185,21 +342,26 @@ def expiry_ignore_enabled? !@settings.ignore_older.nil? end - def shrunk? - @last_stat_size < @bytes_read + def set_depth_first_read_loop + @read_loop_count = FileWatch::MAX_ITERATIONS + @read_chunk_size = FileWatch::FILE_READ_SIZE + @read_bytesize_description = "All" end - def grown? - @last_stat_size > @bytes_read + def set_user_defined_read_loop + @read_loop_count = @settings.file_chunk_count + @read_chunk_size = @settings.file_chunk_size + @read_bytesize_description = @read_loop_count == FileWatch::MAX_ITERATIONS ? "All" : (@read_loop_count * @read_chunk_size).to_s end - def restat - set_stat(pathname.stat) + def reset_bytes_unread + # called from shrink + @bytes_unread = 0 end def set_state(value) @recent_states.shift if @recent_states.size == 8 - @recent_states << @state + @recent_states << @state unless @state.nil? @state = value end @@ -216,7 +378,7 @@ def file_ignorable? # (Time.now - stat.mtime) <- in jruby, this does int and float # conversions before the subtraction and returns a float. # so use all floats upfront - (Time.now.to_f - @modified_at) > @settings.ignore_older + (Time.now.to_f - modified_at) > @settings.ignore_older end def file_can_close? @@ -224,16 +386,26 @@ def file_can_close? (Time.now.to_f - @accessed_at) > @settings.close_older end + def details + detail = "@filename='#{filename}', @state='#{state}', @recent_states='#{@recent_states.inspect}', " + detail.concat("@bytes_read='#{@bytes_read}', @bytes_unread='#{@bytes_unread}', current_size='#{current_size}', ") + detail.concat("last_stat_size='#{last_stat_size}', file_open?='#{file_open?}', @initial=#{@initial}") + "" + end + + def inspect + "\" 0 + char_pointer_to_ruby_string(out) + else + "unknown" + end + ensure + CloseHandle(handle) if close_handle + end + + def self.identifier_from_io(io) + FileWatch::FileExt.io_handle(io) do |pointer| + identifier_from_handle(pointer, false) + end + end + + def self.identifier_from_path(path) + identifier_from_handle(open_handle_from_path(path)) + end + + def self.identifier_from_path_ex(path) + identifier_from_handle_ex(open_handle_from_path(path)) + end + + def self.identifier_from_io_ex(io) + FileWatch::FileExt.io_handle(io) do |pointer| + identifier_from_handle_ex(pointer, false) + end + end + + def self.identifier_from_handle_ex(handle, close_handle = true) + fileIdInfo = Winhelper::FileIdInfo.new + success = GetFileInformationByHandleEx(handle, :FileIdInfo, fileIdInfo, fileIdInfo.size) + if success > 0 + vsn = fileIdInfo[:volumeSerialNumber] + lpfid = fileIdInfo[:fileId][:lowPart] + hpfid = fileIdInfo[:fileId][:highPart] + return "#{vsn}-#{lpfid}-#{hpfid}" + else + return 'unknown' + end + ensure + CloseHandle(handle) if close_handle + end + + def self.identifier_from_handle(handle, close_handle = true) fileInfo = Winhelper::FileInformation.new success = GetFileInformationByHandle(handle, fileInfo) - CloseHandle(handle) - if success == 1 + if success > 0 #args = [ - # fileInfo[:fileAttributes], fileInfo[:volumeSerialNumber], fileInfo[:fileSizeHigh], fileInfo[:fileSizeLow], - # fileInfo[:numberOfLinks], fileInfo[:fileIndexHigh], fileInfo[:fileIndexLow] - # ] + # fileInfo[:fileAttributes], fileInfo[:volumeSerialNumber], fileInfo[:fileSizeHigh], fileInfo[:fileSizeLow], + # fileInfo[:numberOfLinks], fileInfo[:fileIndexHigh], fileInfo[:fileIndexLow] + # ] #p "Information: %u %u %u %u %u %u %u " % args #this is only guaranteed on NTFS, for ReFS on windows 2012, GetFileInformationByHandleEx should be used with FILE_ID_INFO, which returns a 128 bit identifier return "#{fileInfo[:volumeSerialNumber]}-#{fileInfo[:fileIndexLow]}-#{fileInfo[:fileIndexHigh]}" else - #p "cannot retrieve file information, returning path" - return path; + return 'unknown' end + ensure + CloseHandle(handle) if close_handle + end + + private + + def self.open_handle_from_path(path) + CreateFileW(in_buffer(path), 0, 7, nil, 3, 128, nil) + end + + def self.in_buffer(string) + utf16le(string) + end + + def self.char_pointer_to_ruby_string(char_pointer, length = 256) + bytes = char_pointer.get_array_of_uchar(0, length) + ignore = bytes.reverse.index{|b| b != 0} - 1 + our_bytes = bytes[0, bytes.length - ignore] + our_bytes.pack("C*").force_encoding("UTF-16LE").encode("UTF-8") + end + + def self.utf16le(string) + string.encode("UTF-16LE") + end + + def self.win1252(string) + string.encode("Windows-1252") end end + #fileId = Winhelper.GetWindowsUniqueFileIdentifier('C:\inetpub\logs\LogFiles\W3SVC1\u_ex1fdsadfsadfasdf30612.log') #p "FileId: " + fileId #p "outside function, sleeping" diff --git a/lib/logstash/inputs/file.rb b/lib/logstash/inputs/file.rb index 81b04b4..511e4dd 100644 --- a/lib/logstash/inputs/file.rb +++ b/lib/logstash/inputs/file.rb @@ -206,7 +206,7 @@ class File < LogStash::Inputs::Base # 1MB from each active file. See the option `max_open_files` for more info. # The default set internally is very large, 4611686018427387903. By default # the file is read to the end before moving to the next active file. - config :file_chunk_count, :validate => :number, :default => FileWatch::FIXNUM_MAX + config :file_chunk_count, :validate => :number, :default => FileWatch::MAX_ITERATIONS # Which attribute of a discovered file should be used to sort the discovered files. # Files can be sort by modified date or full path alphabetic. @@ -312,8 +312,14 @@ def register end end @codec = LogStash::Codecs::IdentityMapCodec.new(@codec) + @completely_stopped = Concurrent::AtomicBoolean.new end # def register + def completely_stopped? + # to synchronise after(:each) blocks in tests that remove the sincedb file before atomic_write completes + @completely_stopped.true? + end + def listener_for(path) # path is the identity FileListener.new(path, self) @@ -333,6 +339,7 @@ def run(queue) @watcher.subscribe(self) # halts here until quit is called # last action of the subscribe call is to write the sincedb exit_flush + @completely_stopped.make_true end # def run def post_process_this(event) @@ -354,7 +361,7 @@ def log_line_received(path, line) end def stop - if @watcher + unless @watcher.nil? @codec.close @watcher.quit end diff --git a/logstash-input-file.gemspec b/logstash-input-file.gemspec index 38e89de..a340f84 100644 --- a/logstash-input-file.gemspec +++ b/logstash-input-file.gemspec @@ -1,7 +1,7 @@ Gem::Specification.new do |s| s.name = 'logstash-input-file' - s.version = '4.1.3' + s.version = '4.1.4' s.licenses = ['Apache-2.0'] s.summary = "Streams events from files" s.description = "This gem is a Logstash plugin required to be installed on top of the Logstash core pipeline using $LS_HOME/bin/logstash-plugin install gemname. This gem is not a stand-alone program" @@ -23,7 +23,14 @@ Gem::Specification.new do |s| s.add_runtime_dependency "logstash-core-plugin-api", ">= 1.60", "<= 2.99" s.add_runtime_dependency 'logstash-codec-plain' - s.add_runtime_dependency 'addressable' + + if RUBY_VERSION.start_with?("1") + s.add_runtime_dependency 'rake', '~> 12.2.0' + s.add_runtime_dependency 'addressable', '~> 2.4.0' + else + s.add_runtime_dependency 'addressable' + end + s.add_runtime_dependency 'logstash-codec-multiline', ['~> 3.0'] s.add_development_dependency 'stud', ['~> 0.0.19'] diff --git a/run_until_fail.sh b/run_until_fail.sh new file mode 100755 index 0000000..3814284 --- /dev/null +++ b/run_until_fail.sh @@ -0,0 +1,4 @@ +while true +do + LOG_AT=ERROR bundle exec rspec -fd --fail-fast --tag ~lsof ./spec || break +done diff --git a/spec/file_ext/file_ext_windows_spec.rb b/spec/file_ext/file_ext_windows_spec.rb new file mode 100644 index 0000000..df54d98 --- /dev/null +++ b/spec/file_ext/file_ext_windows_spec.rb @@ -0,0 +1,36 @@ +# encoding: utf-8 + +require_relative '../filewatch/spec_helper' + +if LogStash::Environment.windows? + describe "basic ops" do + let(:fixture_dir) { Pathname.new(FileWatch::FIXTURE_DIR).expand_path } + let(:file_path) { fixture_dir.join('uncompressed.log') } + it "path works" do + path = file_path.to_path + identifier = Winhelper.identifier_from_path(path) + STDOUT.puts("--- >>", identifier, "------") + expect(identifier.count('-')).to eq(2) + fs_name = Winhelper.file_system_type_from_path(path) + STDOUT.puts("--- >>", fs_name, "------") + expect(fs_name).to eq("NTFS") + # identifier = Winhelper.identifier_from_path_ex(path) + # STDOUT.puts("--- >>", identifier, "------") + # expect(identifier.count('-')).to eq(2) + end + + it "io works" do + file = FileWatch::FileOpener.open(file_path.to_path) + identifier = Winhelper.identifier_from_io(file) + file.close + STDOUT.puts("--- >>", identifier, "------") + expect(identifier.count('-')).to eq(2) + # fs_name = Winhelper.file_system_type_from_io(file) + # STDOUT.puts("--- >>", fs_name, "------") + # expect(fs_name).to eq("NTFS") + # identifier = Winhelper.identifier_from_path_ex(path) + # STDOUT.puts("--- >>", identifier, "------") + # expect(identifier.count('-')).to eq(2) + end + end +end diff --git a/spec/filewatch/read_mode_handlers_read_file_spec.rb b/spec/filewatch/read_mode_handlers_read_file_spec.rb index 4667b36..ab36e71 100644 --- a/spec/filewatch/read_mode_handlers_read_file_spec.rb +++ b/spec/filewatch/read_mode_handlers_read_file_spec.rb @@ -12,7 +12,7 @@ module FileWatch let(:sdb_collection) { SincedbCollection.new(settings) } let(:directory) { Pathname.new(FIXTURE_DIR) } let(:pathname) { directory.join('uncompressed.log') } - let(:watched_file) { WatchedFile.new(pathname, pathname.stat, settings) } + let(:watched_file) { WatchedFile.new(pathname, PathStatClass.new(pathname), settings) } let(:processor) { ReadMode::Processor.new(settings).add_watch(watch) } let(:file) { DummyFileReader.new(settings.file_chunk_size, 2) } diff --git a/spec/filewatch/reading_spec.rb b/spec/filewatch/reading_spec.rb index 4f1f845..53deee1 100644 --- a/spec/filewatch/reading_spec.rb +++ b/spec/filewatch/reading_spec.rb @@ -30,82 +30,115 @@ module FileWatch end let(:observer) { TestObserver.new } let(:reading) { ObservingRead.new(opts) } - let(:actions) do - RSpec::Sequencing.run_after(0.45, "quit after a short time") do - reading.quit - end - end + let(:listener1) { observer.listener_for(file_path) } after do FileUtils.rm_rf(directory) unless directory =~ /fixture/ end context "when watching a directory with files" do - let(:directory) { Stud::Temporary.directory } - let(:watch_dir) { ::File.join(directory, "*.log") } - let(:file_path) { ::File.join(directory, "1.log") } - + let(:actions) do + RSpec::Sequencing.run("quit after a short time") do + File.open(file_path, "wb") { |file| file.write("line1\nline2\n") } + end + .then("watch") do + reading.watch_this(watch_dir) + end + .then("wait") do + wait(2).for{listener1.calls.last}.to eq(:delete) + end + .then("quit") do + reading.quit + end + end it "the file is read" do - File.open(file_path, "wb") { |file| file.write("line1\nline2\n") } - actions.activate - reading.watch_this(watch_dir) + actions.activate_quietly reading.subscribe(observer) - expect(observer.listener_for(file_path).calls).to eq([:open, :accept, :accept, :eof, :delete]) - expect(observer.listener_for(file_path).lines).to eq(["line1", "line2"]) + actions.assert_no_errors + expect(listener1.calls).to eq([:open, :accept, :accept, :eof, :delete]) + expect(listener1.lines).to eq(["line1", "line2"]) end end context "when watching a directory with files and sincedb_path is /dev/null or NUL" do - let(:directory) { Stud::Temporary.directory } let(:sincedb_path) { File::NULL } - let(:watch_dir) { ::File.join(directory, "*.log") } - let(:file_path) { ::File.join(directory, "1.log") } - + let(:actions) do + RSpec::Sequencing.run("quit after a short time") do + File.open(file_path, "wb") { |file| file.write("line1\nline2\n") } + end + .then("watch") do + reading.watch_this(watch_dir) + end + .then("wait") do + wait(2).for{listener1.calls.last}.to eq(:delete) + end + .then("quit") do + reading.quit + end + end it "the file is read" do - File.open(file_path, "wb") { |file| file.write("line1\nline2\n") } - actions.activate - reading.watch_this(watch_dir) + actions.activate_quietly reading.subscribe(observer) - expect(observer.listener_for(file_path).calls).to eq([:open, :accept, :accept, :eof, :delete]) - expect(observer.listener_for(file_path).lines).to eq(["line1", "line2"]) + actions.assert_no_errors + expect(listener1.calls).to eq([:open, :accept, :accept, :eof, :delete]) + expect(listener1.lines).to eq(["line1", "line2"]) end end context "when watching a directory with files using striped reading" do - let(:directory) { Stud::Temporary.directory } - let(:watch_dir) { ::File.join(directory, "*.log") } - let(:file_path1) { ::File.join(directory, "1.log") } let(:file_path2) { ::File.join(directory, "2.log") } # use a chunk size that does not align with the line boundaries - let(:opts) { super.merge(:file_chunk_size => 10, :file_chunk_count => 1)} + let(:opts) { super.merge(:file_chunk_size => 10, :file_chunk_count => 1, :file_sort_by => "path")} let(:lines) { [] } let(:observer) { TestObserver.new(lines) } - + let(:listener2) { observer.listener_for(file_path2) } + let(:actions) do + RSpec::Sequencing.run("create file") do + File.open(file_path, "w") { |file| file.write("string1\nstring2") } + File.open(file_path2, "w") { |file| file.write("stringA\nstringB") } + end + .then("watch") do + reading.watch_this(watch_dir) + end + .then("wait") do + wait(2).for{listener1.calls.last == :delete && listener2.calls.last == :delete}.to eq(true) + end + .then("quit") do + reading.quit + end + end it "the files are read seemingly in parallel" do - File.open(file_path1, "w") { |file| file.write("string1\nstring2\n") } - File.open(file_path2, "w") { |file| file.write("stringA\nstringB\n") } - actions.activate - reading.watch_this(watch_dir) + actions.activate_quietly reading.subscribe(observer) - if lines.first == "stringA" - expect(lines).to eq(%w(stringA string1 stringB string2)) - else - expect(lines).to eq(%w(string1 stringA string2 stringB)) - end + actions.assert_no_errors + expect(listener1.calls).to eq([:open, :accept, :accept, :eof, :delete]) + expect(listener2.calls).to eq([:open, :accept, :accept, :eof, :delete]) + expect(lines).to eq(%w(string1 stringA string2 stringB)) end end context "when a non default delimiter is specified and it is not in the content" do let(:opts) { super.merge(:delimiter => "\nø") } - + let(:actions) do + RSpec::Sequencing.run("create file") do + File.open(file_path, "wb") { |file| file.write("line1\nline2") } + end + .then("watch") do + reading.watch_this(watch_dir) + end + .then("wait") do + wait(2).for{listener1.calls.last}.to eq(:delete) + end + .then("quit") do + reading.quit + end + end it "the file is opened, data is read, but no lines are found initially, at EOF the whole file becomes the line" do - File.open(file_path, "wb") { |file| file.write("line1\nline2") } - actions.activate - reading.watch_this(watch_dir) + actions.activate_quietly reading.subscribe(observer) - listener = observer.listener_for(file_path) - expect(listener.calls).to eq([:open, :accept, :eof, :delete]) - expect(listener.lines).to eq(["line1\nline2"]) + actions.assert_no_errors + expect(listener1.calls).to eq([:open, :accept, :eof, :delete]) + expect(listener1.lines).to eq(["line1\nline2"]) sincedb_record_fields = File.read(sincedb_path).split(" ") position_field_index = 3 # tailing, no delimiter, we are expecting one, if it grows we read from the start. @@ -116,18 +149,28 @@ module FileWatch describe "reading fixtures" do let(:directory) { FIXTURE_DIR } - + let(:actions) do + RSpec::Sequencing.run("watch") do + reading.watch_this(watch_dir) + end + .then("wait") do + wait(1).for{listener1.calls.last}.to eq(:delete) + end + .then("quit") do + reading.quit + end + end context "for an uncompressed file" do let(:watch_dir) { ::File.join(directory, "unc*.log") } let(:file_path) { ::File.join(directory, 'uncompressed.log') } it "the file is read" do FileWatch.make_fixture_current(file_path) - actions.activate - reading.watch_this(watch_dir) + actions.activate_quietly reading.subscribe(observer) - expect(observer.listener_for(file_path).calls).to eq([:open, :accept, :accept, :eof, :delete]) - expect(observer.listener_for(file_path).lines.size).to eq(2) + actions.assert_no_errors + expect(listener1.calls).to eq([:open, :accept, :accept, :eof, :delete]) + expect(listener1.lines.size).to eq(2) end end @@ -137,11 +180,11 @@ module FileWatch it "the file is read" do FileWatch.make_fixture_current(file_path) - actions.activate - reading.watch_this(watch_dir) + actions.activate_quietly reading.subscribe(observer) - expect(observer.listener_for(file_path).calls).to eq([:open, :accept, :accept, :eof, :delete]) - expect(observer.listener_for(file_path).lines.size).to eq(2) + actions.assert_no_errors + expect(listener1.calls).to eq([:open, :accept, :accept, :eof, :delete]) + expect(listener1.lines.size).to eq(2) end end @@ -151,11 +194,11 @@ module FileWatch it "the file is read" do FileWatch.make_fixture_current(file_path) - actions.activate - reading.watch_this(watch_dir) + actions.activate_quietly reading.subscribe(observer) - expect(observer.listener_for(file_path).calls).to eq([:open, :accept, :accept, :eof, :delete]) - expect(observer.listener_for(file_path).lines.size).to eq(2) + actions.assert_no_errors + expect(listener1.calls).to eq([:open, :accept, :accept, :eof, :delete]) + expect(listener1.lines.size).to eq(2) end end end diff --git a/spec/filewatch/rotate_spec.rb b/spec/filewatch/rotate_spec.rb new file mode 100644 index 0000000..e1d2b4e --- /dev/null +++ b/spec/filewatch/rotate_spec.rb @@ -0,0 +1,451 @@ +# encoding: utf-8 +require 'stud/temporary' +require_relative 'spec_helper' +require 'filewatch/observing_tail' + +# simulate size based rotation ala +# See https://docs.python.org/2/library/logging.handlers.html#rotatingfilehandler +# The specified file is opened and used as the stream for logging. +# If mode is not specified, 'a' is used. If encoding is not None, it is used to +# open the file with that encoding. If delay is true, then file opening is deferred +# until the first call to emit(). By default, the file grows indefinitely. +# You can use the maxBytes and backupCount values to allow the file to rollover +# at a predetermined size. When the size is about to be exceeded, the file is +# closed and a new file is silently opened for output. Rollover occurs whenever +# the current log file is nearly maxBytes in length; if either of maxBytes or +# backupCount is zero, rollover never occurs. If backupCount is non-zero, the +# system will save old log files by appending the extensions ‘.1’, ‘.2’ etc., +# to the filename. For example, with a backupCount of 5 and a base file name of +# app.log, you would get app.log, app.log.1, app.log.2, up to app.log.5. +# The file being written to is always app.log. When this file is filled, it is +# closed and renamed to app.log.1, and if files app.log.1, app.log.2, etc. +# exist, then they are renamed to app.log.2, app.log.3 etc. respectively. + +module FileWatch + describe Watch, :unix => true do + let(:directory) { Pathname.new(Stud::Temporary.directory) } + let(:file1_path) { file_path.to_path } + let(:max) { 4095 } + let(:stat_interval) { 0.01 } + let(:discover_interval) { 15 } + let(:start_new_files_at) { :beginning } + let(:sincedb_path) { directory.join("tailing.sdb") } + let(:opts) do + { + :stat_interval => stat_interval, :start_new_files_at => start_new_files_at, :max_active => max, + :delimiter => "\n", :discover_interval => discover_interval, :sincedb_path => sincedb_path.to_path + } + end + let(:observer) { TestObserver.new } + let(:tailing) { ObservingTail.new(opts) } + let(:line1) { "Line 1 - Lorem ipsum dolor sit amet, consectetur adipiscing elit." } + let(:line2) { "Line 2 - Proin ut orci lobortis, congue diam in, dictum est." } + let(:line3) { "Line 3 - Sed vestibulum accumsan sollicitudin." } + + before do + directory + wait(1.0).for{Dir.exist?(directory)}.to eq(true) + end + + after do + FileUtils.rm_rf(directory) + wait(1.0).for{Dir.exist?(directory)}.to eq(false) + end + + context "create + rename rotation: when a new logfile is renamed to a path we have seen before and the open file is fully read, renamed outside glob" do + let(:watch_dir) { directory.join("*A.log") } + let(:file_path) { directory.join("1A.log") } + subject { described_class.new(conf) } + let(:listener1) { observer.listener_for(file1_path) } + let(:listener2) { observer.listener_for(second_file.to_path) } + let(:actions) do + RSpec::Sequencing + .run_after(0.25, "create file") do + file_path.open("wb") { |file| file.write("#{line1}\n") } + end + .then_after(0.25, "write a 'unfinished' line") do + file_path.open("ab") { |file| file.write(line2) } + end + .then_after(0.25, "rotate once") do + tmpfile = directory.join("1.logtmp") + tmpfile.open("wb") { |file| file.write("\n#{line3}\n")} + file_path.rename(directory.join("1.log.1")) + FileUtils.mv(directory.join("1.logtmp").to_path, file1_path) + end + .then("wait for expectation") do + wait(2).for{listener1.calls}.to eq([:open, :accept, :accept, :accept]) + end + .then("quit") do + tailing.quit + end + end + + it "content from both inodes are sent via the same stream" do + actions.activate_quietly + tailing.watch_this(watch_dir.to_path) + tailing.subscribe(observer) + actions.assert_no_errors + lines = listener1.lines + expect(lines[0]).to eq(line1) + expect(lines[1]).to eq(line2) + expect(lines[2]).to eq(line3) + end + end + + context "create + rename rotation: a multiple file rename cascade" do + let(:watch_dir) { directory.join("*B.log") } + let(:file_path) { directory.join("1B.log") } + subject { described_class.new(conf) } + let(:second_file) { directory.join("2B.log") } + let(:third_file) { directory.join("3B.log") } + let(:listener1) { observer.listener_for(file1_path) } + let(:listener2) { observer.listener_for(second_file.to_path) } + let(:listener3) { observer.listener_for(third_file.to_path) } + let(:actions) do + RSpec::Sequencing + .run_after(0.25, "create file") do + file_path.open("wb") { |file| file.write("#{line1}\n") } + end + .then_after(0.25, "rotate 1 - line1(66) is in 2B.log, line2(61) is in 1B.log") do + file_path.rename(second_file) + file_path.open("wb") { |file| file.write("#{line2}\n") } + end + .then_after(0.25, "rotate 2 - line1(66) is in 3B.log, line2(61) is in 2B.log, line3(47) is in 1B.log") do + second_file.rename(third_file) + file_path.rename(second_file) + file_path.open("wb") { |file| file.write("#{line3}\n") } + end + .then("wait for expectations to be met") do + wait(0.75).for{listener1.lines.size == 3 && listener3.lines.empty? && listener2.lines.empty?}.to eq(true) + end + .then("quit") do + tailing.quit + end + end + + it "content from both inodes are sent via the same stream" do + actions.activate_quietly + tailing.watch_this(watch_dir.to_path) + tailing.subscribe(observer) + actions.assert_no_errors + expect(listener1.lines[0]).to eq(line1) + expect(listener1.lines[1]).to eq(line2) + expect(listener1.lines[2]).to eq(line3) + end + end + + context "create + rename rotation: a two file rename cascade in slow motion" do + let(:watch_dir) { directory.join("*C.log") } + let(:file_path) { directory.join("1C.log") } + let(:stat_interval) { 0.01 } + subject { described_class.new(conf) } + let(:second_file) { directory.join("2C.log") } + let(:listener1) { observer.listener_for(file1_path) } + let(:listener2) { observer.listener_for(second_file.to_path) } + let(:actions) do + RSpec::Sequencing + .run_after(0.25, "create original - write line 1, 66 bytes") do + file_path.open("wb") { |file| file.write("#{line1}\n") } + end + .then_after(0.25, "rename to 2.log") do + file_path.rename(second_file) + end + .then_after(0.25, "write line 2 to original, 61 bytes") do + file_path.open("wb") { |file| file.write("#{line2}\n") } + end + .then_after(0.25, "rename to 2.log again") do + file_path.rename(second_file) + end + .then_after(0.25, "write line 3 to original, 47 bytes") do + file_path.open("wb") { |file| file.write("#{line3}\n") } + end + .then("wait for expectations to be met") do + wait(1).for{listener1.lines.size == 3 && listener2.lines.empty?}.to eq(true) + end + .then("quit") do + tailing.quit + end + end + + it "content from both inodes are sent via the same stream AND content from the rotated file is not read again" do + actions.activate_quietly + tailing.watch_this(watch_dir.to_path) + tailing.subscribe(observer) + actions.assert_no_errors + expect(listener1.lines[0]).to eq(line1) + expect(listener1.lines[1]).to eq(line2) + expect(listener1.lines[2]).to eq(line3) + end + end + + context "create + rename rotation: a two file rename cascade in normal speed" do + let(:watch_dir) { directory.join("*D.log") } + let(:file_path) { directory.join("1D.log") } + subject { described_class.new(conf) } + let(:second_file) { directory.join("2D.log") } + let(:listener1) { observer.listener_for(file1_path) } + let(:listener2) { observer.listener_for(second_file.to_path) } + let(:actions) do + RSpec::Sequencing + .run_after(0.25, "create original - write line 1, 66 bytes") do + file_path.open("wb") { |file| file.write("#{line1}\n") } + end + .then_after(0.25, "rename to 2.log") do + file_path.rename(second_file) + file_path.open("wb") { |file| file.write("#{line2}\n") } + end + .then_after(0.25, "rename to 2.log again") do + file_path.rename(second_file) + file_path.open("wb") { |file| file.write("#{line3}\n") } + end + .then("wait for expectations to be met") do + wait(0.5).for{listener1.lines.size == 3 && listener2.lines.empty?}.to eq(true) + end + .then("quit") do + tailing.quit + end + end + + it "content from both inodes are sent via the same stream AND content from the rotated file is not read again" do + actions.activate_quietly + tailing.watch_this(watch_dir.to_path) + tailing.subscribe(observer) + actions.assert_no_errors + expect(listener1.lines[0]).to eq(line1) + expect(listener1.lines[1]).to eq(line2) + expect(listener1.lines[2]).to eq(line3) + end + end + + context "create + rename rotation: when a new logfile is renamed to a path we have seen before but not all content from the previous the file is read" do + let(:opts) { super.merge( + :file_chunk_size => line1.bytesize.succ, + :file_chunk_count => 1 + ) } + let(:watch_dir) { directory.join("*E.log") } + let(:file_path) { directory.join("1E.log") } + subject { described_class.new(conf) } + let(:listener1) { observer.listener_for(file1_path) } + let(:actions) do + RSpec::Sequencing + .run_after(0.25, "create file") do + file_path.open("wb") do |file| + 65.times{file.puts(line1)} + end + end + .then_after(0.25, "rotate") do + tmpfile = directory.join("1E.logtmp") + tmpfile.open("wb") { |file| file.puts(line1)} + file_path.rename(directory.join("1E.log.1")) + tmpfile.rename(directory.join("1E.log")) + end + .then("wait for expectations to be met") do + wait(0.5).for{listener1.lines.size}.to eq(66) + end + .then("quit") do + tailing.quit + end + end + + it "content from both inodes are sent via the same stream" do + actions.activate_quietly + tailing.watch_this(watch_dir.to_path) + tailing.subscribe(observer) + actions.assert_no_errors + expected_calls = ([:accept] * 66).unshift(:open) + expect(listener1.lines.uniq).to eq([line1]) + expect(listener1.calls).to eq(expected_calls) + expect(sincedb_path.readlines.size).to eq(2) + end + end + + context "copy + truncate rotation: when a logfile is copied to a new path and truncated and the open file is fully read" do + let(:watch_dir) { directory.join("*F.log") } + let(:file_path) { directory.join("1F.log") } + subject { described_class.new(conf) } + let(:listener1) { observer.listener_for(file1_path) } + let(:actions) do + RSpec::Sequencing + .run_after(0.25, "create file") do + file_path.open("wb") { |file| file.puts(line1); file.puts(line2) } + end + .then_after(0.25, "rotate") do + FileUtils.cp(file1_path, directory.join("1F.log.1").to_path) + file_path.truncate(0) + end + .then_after(0.25, "write to truncated file") do + file_path.open("wb") { |file| file.puts(line3) } + end + .then("wait for expectations to be met") do + wait(0.5).for{listener1.lines.size}.to eq(3) + end + .then("quit") do + tailing.quit + end + end + + it "content is read correctly" do + actions.activate_quietly + tailing.watch_this(watch_dir.to_path) + tailing.subscribe(observer) + actions.assert_no_errors + expect(listener1.lines).to eq([line1, line2, line3]) + expect(listener1.calls).to eq([:open, :accept, :accept, :accept]) + end + end + + context "copy + truncate rotation: when a logfile is copied to a new path and truncated before the open file is fully read" do + let(:opts) { super.merge( + :file_chunk_size => line1.bytesize.succ, + :file_chunk_count => 1 + ) } + let(:watch_dir) { directory.join("*G.log") } + let(:file_path) { directory.join("1G.log") } + subject { described_class.new(conf) } + let(:listener1) { observer.listener_for(file1_path) } + let(:actions) do + RSpec::Sequencing + .run_after(0.25, "create file") do + file_path.open("wb") { |file| 65.times{file.puts(line1)} } + end + .then_after(0.25, "rotate") do + FileUtils.cp(file1_path, directory.join("1G.log.1").to_path) + file_path.truncate(0) + end + .then_after(0.25, "write to truncated file") do + file_path.open("wb") { |file| file.puts(line3) } + end + .then("wait for expectations to be met") do + wait(0.5).for{listener1.lines.last}.to eq(line3) + end + .then("quit") do + tailing.quit + end + end + + it "unread content before the truncate is lost" do + actions.activate_quietly + tailing.watch_this(watch_dir.to_path) + tailing.subscribe(observer) + actions.assert_no_errors + expect(listener1.lines.size).to be < 66 + end + end + + context "? rotation: when an active file is renamed inside the glob and the reading does not lag" do + let(:watch_dir) { directory.join("*H.log") } + let(:file_path) { directory.join("1H.log") } + let(:file2) { directory.join("2H.log") } + subject { described_class.new(conf) } + let(:listener1) { observer.listener_for(file1_path) } + let(:listener2) { observer.listener_for(file2.to_path) } + let(:actions) do + RSpec::Sequencing + .run_after(0.25, "create file") do + file_path.open("wb") { |file| file.puts(line1); file.puts(line2) } + end + .then_after(0.25, "rename") do + FileUtils.mv(file1_path, file2.to_path) + end + .then_after(0.25, "write to renamed file") do + file2.open("ab") { |file| file.puts(line3) } + end + .then("wait for expectations to be met") do + wait(0.75).for{listener1.lines.size + listener2.lines.size}.to eq(3) + end + .then("quit") do + tailing.quit + end + end + + it "content is read correctly, the renamed file is not reread from scratch" do + actions.activate_quietly + tailing.watch_this(watch_dir.to_path) + tailing.subscribe(observer) + actions.assert_no_errors + expect(listener1.lines).to eq([line1, line2]) + expect(listener2.lines).to eq([line3]) + end + end + + context "? rotation: when an active file is renamed inside the glob and the reading lags behind" do + let(:opts) { super.merge( + :file_chunk_size => line1.bytesize.succ, + :file_chunk_count => 2 + ) } + let(:watch_dir) { directory.join("*I.log") } + let(:file_path) { directory.join("1I.log") } + let(:file2) { directory.join("2I.log") } + subject { described_class.new(conf) } + let(:listener1) { observer.listener_for(file1_path) } + let(:listener2) { observer.listener_for(file2.to_path) } + let(:actions) do + RSpec::Sequencing + .run_after(0.25, "create file") do + file_path.open("wb") { |file| 65.times{file.puts(line1)} } + end + .then_after(0.25, "rename") do + FileUtils.mv(file1_path, file2.to_path) + end + .then_after(0.25, "write to renamed file") do + file2.open("ab") { |file| file.puts(line3) } + end + .then("wait for expectations to be met") do + wait(1.25).for{listener1.lines.size + listener2.lines.size}.to eq(66) + end + .then("quit") do + tailing.quit + end + end + + it "content is read correctly, the renamed file is not reread from scratch" do + actions.activate_quietly + tailing.watch_this(watch_dir.to_path) + tailing.subscribe(observer) + actions.assert_no_errors + expect(listener2.lines.last).to eq(line3) + end + end + + context "? rotation: when a not active file is rotated outside the glob before the file is read" do + let(:opts) { super.merge( + :close_older => 3600, + :max_active => 1, + :file_sort_by => "path" + ) } + let(:watch_dir) { directory.join("*J.log") } + let(:file_path) { directory.join("1J.log") } + let(:file2) { directory.join("2J.log") } + let(:file3) { directory.join("2J.log.1") } + let(:listener1) { observer.listener_for(file1_path) } + let(:listener2) { observer.listener_for(file2.to_path) } + let(:listener3) { observer.listener_for(file3.to_path) } + subject { described_class.new(conf) } + let(:actions) do + RSpec::Sequencing + .run_after(0.25, "create file") do + file_path.open("wb") { |file| 65.times{file.puts(line1)} } + file2.open("wb") { |file| 65.times{file.puts(line1)} } + end + .then_after(0.25, "rename") do + FileUtils.mv(file2.to_path, file3.to_path) + end + .then("wait for expectations to be met") do + wait(1.25).for{listener1.lines.size}.to eq(65) + end + .then("quit") do + tailing.quit + end + end + + it "file 1 content is read correctly, the renamed file 2 is not read at all" do + actions.activate_quietly + tailing.watch_this(watch_dir.to_path) + tailing.subscribe(observer) + actions.assert_no_errors + expect(listener2.lines.size).to eq(0) + expect(listener3.lines.size).to eq(0) + end + end + end +end diff --git a/spec/filewatch/spec_helper.rb b/spec/filewatch/spec_helper.rb index 0b8bfb7..f074133 100644 --- a/spec/filewatch/spec_helper.rb +++ b/spec/filewatch/spec_helper.rb @@ -1,7 +1,8 @@ # encoding: utf-8 require "rspec_sequencing" -require 'rspec/wait' +# require 'rspec/wait' require "logstash/devutils/rspec/spec_helper" +require "concurrent" require "timecop" def formatted_puts(text) @@ -24,9 +25,32 @@ def formatted_puts(text) end end +require_relative "../helpers/rspec_wait_handler_helper" unless defined? RSPEC_WAIT_HANDLER_PATCHED +require_relative "../helpers/logging_level_helper" unless defined? LOG_AT_HANDLED + require 'filewatch/bootstrap' module FileWatch + class DummyIO + def stat + self + end + def ino + 23456 + end + def size + 65535 + end + def mtime + Time.now + end + def dev_major + 1 + end + def dev_minor + 5 + end + end class DummyFileReader def initialize(read_size, iterations) @@ -34,6 +58,7 @@ def initialize(read_size, iterations) @iterations = iterations @closed = false @accumulated = 0 + @io = DummyIO.new end def file_seek(*) end @@ -43,6 +68,9 @@ def close() def closed? @closed end + def to_io + @io + end def sysread(amount) @accumulated += amount if @accumulated > @read_size * @iterations @@ -67,7 +95,7 @@ def self.make_fixture_current(path, time = Time.now) class TracerBase def initialize - @tracer = [] + @tracer = Concurrent::Array.new end def trace_for(symbol) @@ -91,8 +119,8 @@ class Listener def initialize(path) @path = path - @lines = [] - @calls = [] + @lines = Concurrent::Array.new + @calls = Concurrent::Array.new end def add_lines(lines) @@ -134,7 +162,7 @@ def initialize(combined_lines = nil) else lambda{|k| Listener.new(k).add_lines(combined_lines) } end - @listeners = Hash.new {|hash, key| hash[key] = listener_proc.call(key) } + @listeners = Concurrent::Hash.new {|hash, key| hash[key] = listener_proc.call(key) } end def listener_for(path) @@ -145,8 +173,3 @@ def clear @listeners.clear; end end end - -ENV["LOG_AT"].tap do |level| - LogStash::Logging::Logger::configure_logging(level) unless level.nil? -end - diff --git a/spec/filewatch/tailing_spec.rb b/spec/filewatch/tailing_spec.rb index 5e082a6..30998ea 100644 --- a/spec/filewatch/tailing_spec.rb +++ b/spec/filewatch/tailing_spec.rb @@ -5,42 +5,45 @@ module FileWatch describe Watch do - before(:all) do - @thread_abort = Thread.abort_on_exception - Thread.abort_on_exception = true - end - - after(:all) do - Thread.abort_on_exception = @thread_abort - end - let(:directory) { Stud::Temporary.directory } - let(:watch_dir) { ::File.join(directory, "*.log") } - let(:file_path) { ::File.join(directory, "1.log") } + let(:watch_dir) { ::File.join(directory, "*#{suffix}.log") } + let(:file_path) { ::File.join(directory, "1#{suffix}.log") } + let(:file_path2) { ::File.join(directory, "2#{suffix}.log") } + let(:file_path3) { ::File.join(directory, "3#{suffix}.log") } let(:max) { 4095 } let(:stat_interval) { 0.1 } let(:discover_interval) { 4 } - let(:start_new_files_at) { :beginning } + let(:start_new_files_at) { :end } let(:sincedb_path) { ::File.join(directory, "tailing.sdb") } let(:opts) do { :stat_interval => stat_interval, :start_new_files_at => start_new_files_at, :max_active => max, - :delimiter => "\n", :discover_interval => discover_interval, :sincedb_path => sincedb_path + :delimiter => "\n", :discover_interval => discover_interval, :sincedb_path => sincedb_path, + :file_sort_by => "path" } end let(:observer) { TestObserver.new } + let(:listener1) { observer.listener_for(file_path) } + let(:listener2) { observer.listener_for(file_path2) } + let(:listener3) { observer.listener_for(file_path3) } let(:tailing) { ObservingTail.new(opts) } + before do + directory + wait(1.0).for{Dir.exist?(directory)}.to eq(true) + end + after do FileUtils.rm_rf(directory) + wait(1.0).for{Dir.exist?(directory)}.to eq(false) end describe "max open files (set to 1)" do let(:max) { 1 } - let(:file_path2) { File.join(directory, "2.log") } let(:wait_before_quit) { 0.15 } let(:stat_interval) { 0.01 } let(:discover_interval) { 4 } + let(:start_new_files_at) { :beginning } let(:actions) do RSpec::Sequencing .run_after(wait_before_quit, "quit after a short time") do @@ -51,94 +54,110 @@ module FileWatch before do ENV["FILEWATCH_MAX_FILES_WARN_INTERVAL"] = "0" File.open(file_path, "wb") { |file| file.write("line1\nline2\n") } - File.open(file_path2, "wb") { |file| file.write("lineA\nlineB\n") } + File.open(file_path2, "wb") { |file| file.write("line-A\nline-B\n") } end context "when max_active is 1" do + let(:suffix) { "A" } it "without close_older set, opens only 1 file" do - actions.activate + actions.activate_quietly + # create files before first discovery, they will be read from the end tailing.watch_this(watch_dir) tailing.subscribe(observer) + actions.assert_no_errors expect(tailing.settings.max_active).to eq(max) - file1_calls = observer.listener_for(file_path).calls - file2_calls = observer.listener_for(file_path2).calls - # file glob order is OS dependent - if file1_calls.empty? - expect(observer.listener_for(file_path2).lines).to eq(["lineA", "lineB"]) - expect(file2_calls).to eq([:open, :accept, :accept]) - else - expect(observer.listener_for(file_path).lines).to eq(["line1", "line2"]) - expect(file1_calls).to eq([:open, :accept, :accept]) - expect(file2_calls).to be_empty - end + expect(listener1.lines).to eq(["line1", "line2"]) + expect(listener1.calls).to eq([:open, :accept, :accept]) + expect(listener2.calls).to be_empty end end context "when close_older is set" do let(:wait_before_quit) { 0.8 } - let(:opts) { super.merge(:close_older => 0.15, :max_active => 1, :stat_interval => 0.1) } + let(:opts) { super.merge(:close_older => 0.1, :max_active => 1, :stat_interval => 0.1) } + let(:suffix) { "B" } it "opens both files" do - actions.activate + actions.activate_quietly tailing.watch_this(watch_dir) tailing.subscribe(observer) + actions.assert_no_errors expect(tailing.settings.max_active).to eq(1) - filelistener_1 = observer.listener_for(file_path) - filelistener_2 = observer.listener_for(file_path2) - expect(filelistener_2.calls).to eq([:open, :accept, :accept, :timed_out]) - expect(filelistener_2.lines).to eq(["lineA", "lineB"]) - expect(filelistener_1.calls).to eq([:open, :accept, :accept, :timed_out]) - expect(filelistener_1.lines).to eq(["line1", "line2"]) + expect(listener2.calls).to eq([:open, :accept, :accept, :timed_out]) + expect(listener2.lines).to eq(["line-A", "line-B"]) + expect(listener1.calls).to eq([:open, :accept, :accept, :timed_out]) + expect(listener1.lines).to eq(["line1", "line2"]) end end end - context "when watching a directory with files" do - let(:start_new_files_at) { :beginning } + context "when watching a directory with files, exisiting content is skipped" do + let(:suffix) { "C" } let(:actions) do - RSpec::Sequencing.run_after(0.45, "quit after a short time") do - tailing.quit - end + RSpec::Sequencing + .run("create file") do + File.open(file_path, "wb") { |file| file.write("lineA\nlineB\n") } + end + .then_after(0.1, "begin watching") do + tailing.watch_this(watch_dir) + end + .then_after(0.05, "add content") do + File.open(file_path, "ab") { |file| file.write("line1\nline2\n") } + end + .then("wait") do + wait(0.75).for{listener1.lines.size}.to eq(2) + end + .then("quit") do + tailing.quit + end end - it "the file is read" do - File.open(file_path, "wb") { |file| file.write("line1\nline2\n") } - actions.activate + it "only the new content is read" do + actions.activate_quietly tailing.watch_this(watch_dir) tailing.subscribe(observer) - expect(observer.listener_for(file_path).calls).to eq([:open, :accept, :accept]) - expect(observer.listener_for(file_path).lines).to eq(["line1", "line2"]) + actions.assert_no_errors + expect(listener1.calls).to eq([:open, :accept, :accept]) + expect(listener1.lines).to eq(["line1", "line2"]) end end context "when watching a directory without files and one is added" do - let(:start_new_files_at) { :beginning } - before do - tailing.watch_this(watch_dir) + let(:suffix) { "D" } + let(:actions) do RSpec::Sequencing - .run_after(0.25, "create file") do + .run("begin watching") do + tailing.watch_this(watch_dir) + end + .then_after(0.1, "create file") do File.open(file_path, "wb") { |file| file.write("line1\nline2\n") } end - .then_after(0.45, "quit after a short time") do + .then("wait") do + wait(0.75).for{listener1.lines.size}.to eq(2) + end + .then("quit") do tailing.quit end end - it "the file is read" do + it "the file is read from the beginning" do + actions.activate_quietly tailing.subscribe(observer) - expect(observer.listener_for(file_path).calls).to eq([:open, :accept, :accept]) - expect(observer.listener_for(file_path).lines).to eq(["line1", "line2"]) + actions.assert_no_errors + expect(listener1.calls).to eq([:open, :accept, :accept]) + expect(listener1.lines).to eq(["line1", "line2"]) end end - describe "given a previously discovered file" do + context "given a previously discovered file" do # these tests rely on the fact that the 'filepath' does not exist on disk # it simulates that the user deleted the file # so when a stat is taken on the file an error is raised + let(:suffix) { "E" } let(:quit_after) { 0.2 } - let(:stat) { double("stat", :size => 100, :ctime => Time.now, :mtime => Time.now, :ino => 234567, :dev_major => 3, :dev_minor => 2) } + let(:stat) { double("stat", :size => 100, :modified_at => Time.now.to_f, :identifier => nil, :inode => 234567, :inode_struct => InodeStruct.new("234567", 1, 5)) } let(:watched_file) { WatchedFile.new(file_path, stat, tailing.settings) } - before do + allow(stat).to receive(:restat).and_raise(Errno::ENOENT) tailing.watch.watched_files_collection.add(watched_file) watched_file.initial_completed end @@ -150,7 +169,7 @@ module FileWatch RSpec::Sequencing.run_after(quit_after, "quit") { tailing.quit } tailing.subscribe(observer) expect(tailing.watch.watched_files_collection).to be_empty - expect(observer.listener_for(file_path).calls).to eq([:delete]) + expect(listener1.calls).to eq([:delete]) end end @@ -160,7 +179,7 @@ module FileWatch RSpec::Sequencing.run_after(quit_after, "quit") { tailing.quit } tailing.subscribe(observer) expect(tailing.watch.watched_files_collection).to be_empty - expect(observer.listener_for(file_path).calls).to eq([:delete]) + expect(listener1.calls).to eq([:delete]) end end @@ -170,7 +189,7 @@ module FileWatch RSpec::Sequencing.run_after(quit_after, "quit") { tailing.quit } tailing.subscribe(observer) expect(tailing.watch.watched_files_collection).to be_empty - expect(observer.listener_for(file_path).calls).to eq([:delete]) + expect(listener1.calls).to eq([:delete]) end end @@ -180,172 +199,214 @@ module FileWatch RSpec::Sequencing.run_after(quit_after, "quit") { tailing.quit } tailing.subscribe(observer) expect(tailing.watch.watched_files_collection).to be_empty - expect(observer.listener_for(file_path).calls).to eq([:delete]) + expect(listener1.calls).to eq([:delete]) end end end context "when a processed file shrinks" do - let(:discover_interval) { 100 } - before do + let(:discover_interval) { 1 } + let(:suffix) { "F" } + let(:actions) do RSpec::Sequencing - .run("create file") do + .run_after(0.1, "start watching") do + tailing.watch_this(watch_dir) + end + .then_after(0.1, "create file") do + # create file after first discovery, will be read from the start File.open(file_path, "wb") { |file| file.write("line1\nline2\nline3\nline4\n") } end - .then_after(0.25, "start watching after files are written") do - tailing.watch_this(watch_dir) + .then("wait for initial lines to be read") do + wait(0.8).for{listener1.lines.size}.to eq(4), "listener1.lines.size not eq 4" end .then_after(0.25, "truncate file and write new content") do File.truncate(file_path, 0) - File.open(file_path, "wb") { |file| file.write("lineA\nlineB\n") } + File.open(file_path, "ab") { |file| file.write("lineA\nlineB\n") } + wait(0.5).for{listener1.lines.size}.to eq(6), "listener1.lines.size not eq 6" end - .then_after(0.25, "quit after a short time") do + .then("quit") do tailing.quit end end it "new changes to the shrunk file are read from the beginning" do + actions.activate_quietly tailing.subscribe(observer) - expect(observer.listener_for(file_path).calls).to eq([:open, :accept, :accept, :accept, :accept, :accept, :accept]) - expect(observer.listener_for(file_path).lines).to eq(["line1", "line2", "line3", "line4", "lineA", "lineB"]) + actions.assert_no_errors + expect(listener1.calls).to eq([:open, :accept, :accept, :accept, :accept, :accept, :accept]) + expect(listener1.lines).to eq(["line1", "line2", "line3", "line4", "lineA", "lineB"]) end end - context "when watching a directory with files and a file is renamed to not match glob" do + context "when watching a directory with files and a file is renamed to not match glob", :unix => true do + let(:suffix) { "G" } let(:new_file_path) { file_path + ".old" } - before do + let(:new_file_listener) { observer.listener_for(new_file_path) } + let(:actions) do RSpec::Sequencing - .run("create file") do - File.open(file_path, "wb") { |file| file.write("line1\nline2\n") } - end - .then_after(0.25, "start watching after files are written") do + .run("start watching") do tailing.watch_this(watch_dir) end + .then_after(0.1, "create file") do + # create file after first discovery, will be read from the beginning + File.open(file_path, "wb") { |file| file.write("line1\nline2\n") } + end .then_after(0.55, "rename file") do FileUtils.mv(file_path, new_file_path) end .then_after(0.55, "then write to renamed file") do File.open(new_file_path, "ab") { |file| file.write("line3\nline4\n") } + wait(0.5).for{listener1.lines.size}.to eq(2), "listener1.lines.size not eq(2)" end - .then_after(0.45, "quit after a short time") do + .then_after(0.1, "quit") do tailing.quit end end it "changes to the renamed file are not read" do + actions.activate_quietly tailing.subscribe(observer) - expect(observer.listener_for(file_path).calls).to eq([:open, :accept, :accept, :delete]) - expect(observer.listener_for(file_path).lines).to eq(["line1", "line2"]) - expect(observer.listener_for(new_file_path).calls).to eq([]) - expect(observer.listener_for(new_file_path).lines).to eq([]) + actions.assert_no_errors + expect(listener1.calls).to eq([:open, :accept, :accept, :delete]) + expect(listener1.lines).to eq(["line1", "line2"]) + expect(new_file_listener.calls).to eq([]) + expect(new_file_listener.lines).to eq([]) end end - context "when watching a directory with files and a file is renamed to match glob" do - let(:new_file_path) { file_path + "2.log" } + context "when watching a directory with files and a file is renamed to match glob", :unix => true do + let(:suffix) { "H" } let(:opts) { super.merge(:close_older => 0) } - before do + let(:listener2) { observer.listener_for(file_path2) } + let(:actions) do RSpec::Sequencing - .run("create file") do + .run("file created") do + # create file before first discovery, will be read from the end File.open(file_path, "wb") { |file| file.write("line1\nline2\n") } end .then_after(0.15, "start watching after files are written") do tailing.watch_this(watch_dir) end - .then_after(0.25, "rename file") do - FileUtils.mv(file_path, new_file_path) + .then("wait") do + wait(0.5).for{listener1.calls.last}.to eq(:timed_out) end - .then("then write to renamed file") do - File.open(new_file_path, "ab") { |file| file.write("line3\nline4\n") } + .then("rename file") do + FileUtils.mv(file_path, file_path2) + end + .then_after(0.1, "then write to renamed file") do + File.open(file_path2, "ab") { |file| file.write("line3\nline4\n") } end - .then_after(0.55, "quit after a short time") do + .then_after(0.1, "wait for lines") do + wait(0.5).for{listener2.lines.size}.to eq(2) + end + .then_after(0.1, "quit") do tailing.quit end end it "the first set of lines are not re-read" do + actions.activate_quietly tailing.subscribe(observer) - expect(observer.listener_for(file_path).lines).to eq(["line1", "line2"]) - expect(observer.listener_for(file_path).calls).to eq([:open, :accept, :accept, :timed_out, :delete]) - expect(observer.listener_for(new_file_path).lines).to eq(["line3", "line4"]) - expect(observer.listener_for(new_file_path).calls).to eq([:open, :accept, :accept, :timed_out]) + actions.assert_no_errors + expect(listener1.lines).to eq([]) + expect(listener1.calls).to eq([:open, :timed_out, :delete]) + expect(listener2.lines).to eq(["line3", "line4"]) + expect(listener2.calls).to eq([:open, :accept, :accept, :timed_out]) end end context "when watching a directory with files and data is appended" do - before do + let(:suffix) { "I" } + let(:actions) do RSpec::Sequencing - .run("create file") do + .run("file created") do File.open(file_path, "wb") { |file| file.write("line1\nline2\n") } end - .then_after(0.25, "start watching after file is written") do + .then_after(0.15, "start watching after file is written") do tailing.watch_this(watch_dir) end .then_after(0.45, "append more lines to the file") do File.open(file_path, "ab") { |file| file.write("line3\nline4\n") } + wait(0.5).for{listener1.lines.size}.to eq(2) end - .then_after(0.45, "quit after a short time") do + .then_after(0.1, "quit") do tailing.quit end end - it "appended lines are read after an EOF" do + it "appended lines are read only" do + actions.activate_quietly tailing.subscribe(observer) - expect(observer.listener_for(file_path).calls).to eq([:open, :accept, :accept, :accept, :accept]) - expect(observer.listener_for(file_path).lines).to eq(["line1", "line2", "line3", "line4"]) + actions.assert_no_errors + expect(listener1.calls).to eq([:open, :accept, :accept]) + expect(listener1.lines).to eq(["line3", "line4"]) end end context "when close older expiry is enabled" do let(:opts) { super.merge(:close_older => 1) } - before do - RSpec::Sequencing - .run("create file") do - File.open(file_path, "wb") { |file| file.write("line1\nline2\n") } - end - .then("start watching before file ages more than close_older") do - tailing.watch_this(watch_dir) - end - .then_after(2.1, "quit after allowing time to close the file") do - tailing.quit - end + let(:suffix) { "J" } + let(:actions) do + RSpec::Sequencing.run("create file") do + File.open(file_path, "wb") { |file| file.write("line1\nline2\n") } + end + .then("watch and wait") do + tailing.watch_this(watch_dir) + wait(1.25).for{listener1.calls}.to eq([:open, :timed_out]) + end + .then("quit") do + tailing.quit + end end - it "lines are read and the file times out" do + it "existing lines are not read and the file times out" do + actions.activate_quietly tailing.subscribe(observer) - expect(observer.listener_for(file_path).calls).to eq([:open, :accept, :accept, :timed_out]) - expect(observer.listener_for(file_path).lines).to eq(["line1", "line2"]) + actions.assert_no_errors + expect(listener1.lines).to eq([]) end end context "when close older expiry is enabled and after timeout the file is appended-to" do - let(:opts) { super.merge(:close_older => 1) } - before do + let(:opts) { super.merge(:close_older => 0.5) } + let(:suffix) { "K" } + let(:actions) do RSpec::Sequencing - .run("create file") do + .run("start watching") do + tailing.watch_this(watch_dir) + end + .then("create file") do File.open(file_path, "wb") { |file| file.write("line1\nline2\n") } end - .then("start watching before file ages more than close_older") do - tailing.watch_this(watch_dir) + .then("wait for file to be read") do + wait(0.5).for{listener1.calls}.to eq([:open, :accept, :accept]), "file is not read" end - .then_after(2.1, "append more lines to file after file ages more than close_older") do + .then("wait for file to be read and time out") do + wait(0.75).for{listener1.calls}.to eq([:open, :accept, :accept, :timed_out]), "file did not timeout the first time" + end + .then("append more lines to file after file ages more than close_older") do File.open(file_path, "ab") { |file| file.write("line3\nline4\n") } end - .then_after(2.1, "quit after allowing time to close the file") do + .then("wait for last timeout") do + wait(0.75).for{listener1.calls}.to eq([:open, :accept, :accept, :timed_out, :open, :accept, :accept, :timed_out]), "file did not timeout the second time" + end + .then("quit") do tailing.quit end end it "all lines are read" do + actions.activate_quietly tailing.subscribe(observer) - expect(observer.listener_for(file_path).calls).to eq([:open, :accept, :accept, :timed_out, :open, :accept, :accept, :timed_out]) - expect(observer.listener_for(file_path).lines).to eq(["line1", "line2", "line3", "line4"]) + actions.assert_no_errors + expect(listener1.lines).to eq(["line1", "line2", "line3", "line4"]) end end context "when ignore older expiry is enabled and all files are already expired" do let(:opts) { super.merge(:ignore_older => 1) } - before do + let(:suffix) { "L" } + let(:actions) do RSpec::Sequencing .run("create file older than ignore_older and watch") do File.open(file_path, "wb") { |file| file.write("line1\nline2\n") } @@ -358,60 +419,112 @@ module FileWatch end it "no files are read" do + actions.activate_quietly + tailing.subscribe(observer) + expect(listener1.calls).to eq([]) + expect(listener1.lines).to eq([]) + end + end + + context "when a file is renamed before it gets activated", :unix => true do + let(:max) { 1 } + let(:opts) { super.merge(:file_chunk_count => 8, :file_chunk_size => 6, :close_older => 0.1, :discover_interval => 6) } + let(:suffix) { "M" } + let(:start_new_files_at) { :beginning } # we are creating files and sincedb record before hand + let(:actions) do + RSpec::Sequencing + .run("create files and sincedb record") do + File.open(file_path, "wb") { |file| 32.times{file.write("line1\n")} } + File.open(file_path2, "wb") { |file| file.write("line2\n") } + # synthesize a sincedb record + stat = File.stat(file_path2) + record = [stat.ino.to_s, stat.dev_major.to_s, stat.dev_minor.to_s, "0", "1526220348.083179", file_path2] + File.open(sincedb_path, "wb") { |file| file.puts(record.join(" ")) } + end + .then_after(0.2, "watch") do + tailing.watch_this(watch_dir) + end + .then_after(0.1, "rename file 2") do + FileUtils.mv(file_path2, file_path3) + end + .then("wait") do + wait(2).for do + listener1.lines.size == 32 && listener2.calls == [:delete] && listener3.calls == [:open, :accept, :timed_out] + end.to eq(true), "listener1.lines != 32 or listener2.calls != [:delete] or listener3.calls != [:open, :accept, :timed_out]" + end + .then("quit") do + tailing.quit + end + end + + it "files are read correctly" do + actions.activate_quietly tailing.subscribe(observer) - expect(observer.listener_for(file_path).calls).to eq([]) - expect(observer.listener_for(file_path).lines).to eq([]) + actions.assert_no_errors + expect(listener2.lines).to eq([]) + expect(listener3.lines).to eq(["line2"]) end end context "when ignore_older is less than close_older and all files are not expired" do - let(:opts) { super.merge(:ignore_older => 1, :close_older => 1.5) } - before do + let(:opts) { super.merge(:ignore_older => 1, :close_older => 1.1) } + let(:suffix) { "N" } + let(:start_new_files_at) { :beginning } + let(:actions) do RSpec::Sequencing - .run("create file") do + .run_after(0.1, "file created") do File.open(file_path, "wb") { |file| file.write("line1\nline2\n") } end .then("start watching before file age reaches ignore_older") do tailing.watch_this(watch_dir) end - .then_after(1.75, "quit after allowing time to close the file") do + .then("wait for lines") do + wait(1.5).for{listener1.calls}.to eq([:open, :accept, :accept, :timed_out]) + end + .then("quit") do tailing.quit end end it "reads lines normally" do + actions.activate_quietly tailing.subscribe(observer) - expect(observer.listener_for(file_path).calls).to eq([:open, :accept, :accept, :timed_out]) - expect(observer.listener_for(file_path).lines).to eq(["line1", "line2"]) + actions.assert_no_errors + expect(listener1.lines).to eq(["line1", "line2"]) end end context "when ignore_older is less than close_older and all files are expired" do let(:opts) { super.merge(:ignore_older => 10, :close_older => 1) } - before do + let(:suffix) { "P" } + let(:actions) do RSpec::Sequencing - .run("create file older than ignore_older and watch") do + .run("creating file") do File.open(file_path, "wb") { |file| file.write("line1\nline2\n") } + end + .then("making it older by 15 seconds and watch") do FileWatch.make_file_older(file_path, 15) tailing.watch_this(watch_dir) end - .then_after(1.5, "quit after allowing time to check the files") do + .then_after(0.75, "quit after allowing time to check the files") do tailing.quit end end it "no files are read" do + actions.activate_quietly tailing.subscribe(observer) - expect(observer.listener_for(file_path).calls).to eq([]) - expect(observer.listener_for(file_path).lines).to eq([]) + expect(listener1.calls).to eq([]) + expect(listener1.lines).to eq([]) end end context "when ignore older and close older expiry is enabled and after timeout the file is appended-to" do - let(:opts) { super.merge(:ignore_older => 20, :close_older => 1) } - before do + let(:opts) { super.merge(:ignore_older => 20, :close_older => 0.5) } + let(:suffix) { "Q" } + let(:actions) do RSpec::Sequencing - .run("create file older than ignore_older and watch") do + .run("file older than ignore_older created and watching") do File.open(file_path, "wb") { |file| file.write("line1\nline2\n") } FileWatch.make_file_older(file_path, 25) tailing.watch_this(watch_dir) @@ -419,37 +532,46 @@ module FileWatch .then_after(0.15, "append more lines to file after file ages more than ignore_older") do File.open(file_path, "ab") { |file| file.write("line3\nline4\n") } end - .then_after(1.25, "quit after allowing time to close the file") do + .then("wait for lines") do + wait(2).for{listener1.calls}.to eq([:open, :accept, :accept, :timed_out]) + end + .then_after(0.1, "quit after allowing time to close the file") do tailing.quit end end it "reads the added lines only" do + actions.activate_quietly tailing.subscribe(observer) - expect(observer.listener_for(file_path).lines).to eq(["line3", "line4"]) - expect(observer.listener_for(file_path).calls).to eq([:open, :accept, :accept, :timed_out]) + actions.assert_no_errors + expect(listener1.lines).to eq(["line3", "line4"]) end end context "when a non default delimiter is specified and it is not in the content" do let(:opts) { super.merge(:ignore_older => 20, :close_older => 1, :delimiter => "\nø") } - before do + let(:suffix) { "R" } + let(:actions) do RSpec::Sequencing - .run("create file") do + .run("start watching") do + tailing.watch_this(watch_dir) + end + .then("creating file") do File.open(file_path, "wb") { |file| file.write("line1\nline2") } end - .then("start watching before file ages more than close_older") do - tailing.watch_this(watch_dir) + .then("wait for :timeout") do + wait(2).for{listener1.calls}.to eq([:open, :timed_out]) end - .then_after(2.1, "quit after allowing time to close the file") do + .then_after(0.75, "quit after allowing time to close the file") do tailing.quit end end it "the file is opened, data is read, but no lines are found, the file times out" do + actions.activate_quietly tailing.subscribe(observer) - expect(observer.listener_for(file_path).calls).to eq([:open, :timed_out]) - expect(observer.listener_for(file_path).lines).to eq([]) + actions.assert_no_errors + expect(listener1.lines).to eq([]) sincedb_record_fields = File.read(sincedb_path).split(" ") position_field_index = 3 # tailing, no delimiter, we are expecting one, if it grows we read from the start. @@ -459,5 +581,3 @@ module FileWatch end end end - - diff --git a/spec/filewatch/watched_file_spec.rb b/spec/filewatch/watched_file_spec.rb index adf0a7e..a532ac1 100644 --- a/spec/filewatch/watched_file_spec.rb +++ b/spec/filewatch/watched_file_spec.rb @@ -8,9 +8,9 @@ module FileWatch context 'Given two instances of the same file' do it 'their sincedb_keys should equate' do - wf_key1 = WatchedFile.new(pathname, pathname.stat, Settings.new).sincedb_key + wf_key1 = WatchedFile.new(pathname, PathStatClass.new(pathname), Settings.new).sincedb_key hash_db = { wf_key1 => 42 } - wf_key2 = WatchedFile.new(pathname, pathname.stat, Settings.new).sincedb_key + wf_key2 = WatchedFile.new(pathname, PathStatClass.new(pathname), Settings.new).sincedb_key expect(wf_key1).to eq(wf_key2) expect(wf_key1).to eql(wf_key2) expect(wf_key1.hash).to eq(wf_key2.hash) @@ -20,7 +20,7 @@ module FileWatch context 'Given a barrage of state changes' do it 'only the previous N state changes are remembered' do - watched_file = WatchedFile.new(pathname, pathname.stat, Settings.new) + watched_file = WatchedFile.new(pathname, PathStatClass.new(pathname), Settings.new) watched_file.ignore watched_file.watch watched_file.activate diff --git a/spec/filewatch/watched_files_collection_spec.rb b/spec/filewatch/watched_files_collection_spec.rb index 06eba28..08130d5 100644 --- a/spec/filewatch/watched_files_collection_spec.rb +++ b/spec/filewatch/watched_files_collection_spec.rb @@ -4,9 +4,9 @@ module FileWatch describe WatchedFilesCollection do let(:time) { Time.now } - let(:stat1) { double("stat1", :size => 98, :ctime => time - 30, :mtime => time - 30, :ino => 234567, :dev_major => 3, :dev_minor => 2) } - let(:stat2) { double("stat2", :size => 99, :ctime => time - 20, :mtime => time - 20, :ino => 234568, :dev_major => 3, :dev_minor => 2) } - let(:stat3) { double("stat3", :size => 100, :ctime => time, :mtime => time, :ino => 234569, :dev_major => 3, :dev_minor => 2) } + let(:stat1) { double("stat1", :size => 98, :modified_at => time - 30, :identifier => nil, :inode => 234567, :inode_struct => InodeStruct.new("234567", 3, 2)) } + let(:stat2) { double("stat2", :size => 99, :modified_at => time - 20, :identifier => nil, :inode => 234568, :inode_struct => InodeStruct.new("234568", 3, 2)) } + let(:stat3) { double("stat3", :size => 100, :modified_at => time, :identifier => nil, :inode => 234569, :inode_struct => InodeStruct.new("234569", 3, 2)) } let(:wf1) { WatchedFile.new("/var/log/z.log", stat1, Settings.new) } let(:wf2) { WatchedFile.new("/var/log/m.log", stat2, Settings.new) } let(:wf3) { WatchedFile.new("/var/log/a.log", stat3, Settings.new) } diff --git a/spec/filewatch/winhelper_spec.rb b/spec/filewatch/winhelper_spec.rb index 9fb2b78..a0d874c 100644 --- a/spec/filewatch/winhelper_spec.rb +++ b/spec/filewatch/winhelper_spec.rb @@ -3,7 +3,7 @@ require "fileutils" if Gem.win_platform? - require "lib/filewatch/winhelper" + require "filewatch/winhelper" describe Winhelper do let(:path) { Stud::Temporary.file.path } @@ -13,11 +13,10 @@ end it "return a unique file identifier" do - volume_serial, file_index_low, file_index_high = Winhelper.GetWindowsUniqueFileIdentifier(path).split("").map(&:to_i) + identifier = Winhelper.identifier_from_path(path) - expect(volume_serial).not_to eq(0) - expect(file_index_low).not_to eq(0) - expect(file_index_high).not_to eq(0) + expect(identifier).not_to eq("unknown") + expect(identifier.count("-")).to eq(2) end end end diff --git a/spec/helpers/logging_level_helper.rb b/spec/helpers/logging_level_helper.rb new file mode 100644 index 0000000..0ecc433 --- /dev/null +++ b/spec/helpers/logging_level_helper.rb @@ -0,0 +1,8 @@ +# encoding: utf-8 + +ENV["LOG_AT"].tap do |level| + if !level.nil? + LogStash::Logging::Logger::configure_logging(level) + LOG_AT_HANDLED = true + end +end diff --git a/spec/helpers/rspec_wait_handler_helper.rb b/spec/helpers/rspec_wait_handler_helper.rb new file mode 100644 index 0000000..2ad595b --- /dev/null +++ b/spec/helpers/rspec_wait_handler_helper.rb @@ -0,0 +1,38 @@ +# encoding: utf-8 + +module RSpec + module Wait + module Handler + def handle_matcher(target, *args, &block) + # there is a similar patch in the rspec-wait repo since Nov, 19 2017 + # it does not look like the author is interested in the change. + # - do not use Ruby Timeout + count = RSpec.configuration.wait_timeout.fdiv(RSpec.configuration.wait_delay).ceil + failure = nil + count.times do + begin + actual = target.respond_to?(:call) ? target.call : target + super(actual, *args, &block) + failure = nil + rescue RSpec::Expectations::ExpectationNotMetError => failure + sleep RSpec.configuration.wait_delay + end + break if failure.nil? + end + raise failure unless failure.nil? + end + end + + # From: https://github.com/rspec/rspec-expectations/blob/v3.0.0/lib/rspec/expectations/handler.rb#L44-L63 + class PositiveHandler < RSpec::Expectations::PositiveExpectationHandler + extend Handler + end + + # From: https://github.com/rspec/rspec-expectations/blob/v3.0.0/lib/rspec/expectations/handler.rb#L66-L93 + class NegativeHandler < RSpec::Expectations::NegativeExpectationHandler + extend Handler + end + end +end + +RSPEC_WAIT_HANDLER_PATCHED = true diff --git a/spec/helpers/spec_helper.rb b/spec/helpers/spec_helper.rb index dc725a6..70bfd8e 100644 --- a/spec/helpers/spec_helper.rb +++ b/spec/helpers/spec_helper.rb @@ -18,7 +18,7 @@ def self.make_fixture_current(path, time = Time.now) class TracerBase def initialize - @tracer = [] + @tracer = Concurrent::Array.new end def trace_for(symbol) @@ -54,6 +54,9 @@ def clone end end +require_relative "rspec_wait_handler_helper" unless defined? RSPEC_WAIT_HANDLER_PATCHED +require_relative "logging_level_helper" unless defined? LOG_AT_HANDLED + unless RSpec::Matchers.method_defined?(:receive_call_and_args) RSpec::Matchers.define(:receive_call_and_args) do |m, args| match do |actual| @@ -66,3 +69,6 @@ def clone end end +ENV["LOG_AT"].tap do |level| + LogStash::Logging::Logger::configure_logging(level) unless level.nil? +end diff --git a/spec/inputs/file_read_spec.rb b/spec/inputs/file_read_spec.rb index 67fe206..d7361bb 100644 --- a/spec/inputs/file_read_spec.rb +++ b/spec/inputs/file_read_spec.rb @@ -9,21 +9,21 @@ require "stud/temporary" require "logstash/codecs/multiline" -FILE_DELIMITER = LogStash::Environment.windows? ? "\r\n" : "\n" - describe LogStash::Inputs::File do describe "'read' mode testing with input(conf) do |pipeline, queue|" do it "should start at the beginning of an existing file and delete the file when done" do - tmpfile_path = Stud::Temporary.pathname - sincedb_path = Stud::Temporary.pathname + directory = Stud::Temporary.directory + tmpfile_path = ::File.join(directory, "A.log") + sincedb_path = ::File.join(directory, "readmode_A_sincedb.txt") + path_path = ::File.join(directory, "*.log") conf = <<-CONFIG input { file { - type => "blah" - path => "#{tmpfile_path}" + id => "blah" + path => "#{path_path}" sincedb_path => "#{sincedb_path}" - delimiter => "#{FILE_DELIMITER}" + delimiter => "|" mode => "read" file_completed_action => "delete" } @@ -31,17 +31,49 @@ CONFIG File.open(tmpfile_path, "a") do |fd| - fd.puts("hello") - fd.puts("world") + fd.write("hello|world") fd.fsync end events = input(conf) do |pipeline, queue| + wait(0.5).for{File.exist?(tmpfile_path)}.to be_falsey 2.times.collect { queue.pop } end expect(events.map{|e| e.get("message")}).to contain_exactly("hello", "world") - expect(File.exist?(tmpfile_path)).to be_falsey + end + + it "should start at the beginning of an existing file and log the file when done" do + directory = Stud::Temporary.directory + tmpfile_path = ::File.join(directory, "A.log") + sincedb_path = ::File.join(directory, "readmode_A_sincedb.txt") + path_path = ::File.join(directory, "*.log") + log_completed_path = ::File.join(directory, "A_completed.txt") + + conf = <<-CONFIG + input { + file { + id => "blah" + path => "#{path_path}" + sincedb_path => "#{sincedb_path}" + delimiter => "|" + mode => "read" + file_completed_action => "log" + file_completed_log_path => "#{log_completed_path}" + } + } + CONFIG + + File.open(tmpfile_path, "a") do |fd| + fd.write("hello|world") + fd.fsync + end + + events = input(conf) do |pipeline, queue| + wait(0.5).for{IO.read(log_completed_path)}.to match(/A\.log/) + 2.times.collect { queue.pop } + end + expect(events.map{|e| e.get("message")}).to contain_exactly("hello", "world") end end @@ -63,7 +95,6 @@ type => "blah" path => "#{tmpfile_path}" sincedb_path => "#{sincedb_path}" - delimiter => "#{FILE_DELIMITER}" mode => "read" file_completed_action => "log" file_completed_log_path => "#{log_completed_path}" @@ -72,12 +103,12 @@ CONFIG events = input(conf) do |pipeline, queue| + wait(0.5).for{IO.read(log_completed_path)}.to match(/#{file_path.to_s}/) 2.times.collect { queue.pop } end expect(events[0].get("message")).to start_with("2010-03-12 23:51") expect(events[1].get("message")).to start_with("2010-03-12 23:51") - expect(IO.read(log_completed_path)).to eq(file_path.to_s + "\n") end end @@ -86,10 +117,11 @@ let(:file_path) { fixture_dir.join('uncompressed.log') } it "the file is read and the path is logged to the `file_completed_log_path` file" do - tmpfile_path = fixture_dir.join("unc*.log") - sincedb_path = Stud::Temporary.pathname FileInput.make_fixture_current(file_path.to_path) - log_completed_path = Stud::Temporary.pathname + tmpfile_path = fixture_dir.join("unc*.log") + directory = Stud::Temporary.directory + sincedb_path = ::File.join(directory, "readmode_B_sincedb.txt") + log_completed_path = ::File.join(directory, "B_completed.txt") conf = <<-CONFIG input { @@ -97,7 +129,6 @@ type => "blah" path => "#{tmpfile_path}" sincedb_path => "#{sincedb_path}" - delimiter => "#{FILE_DELIMITER}" mode => "read" file_completed_action => "log" file_completed_log_path => "#{log_completed_path}" @@ -106,23 +137,25 @@ CONFIG events = input(conf) do |pipeline, queue| + wait(0.5).for{IO.read(log_completed_path)}.to match(/uncompressed\.log/) 2.times.collect { queue.pop } end expect(events[0].get("message")).to start_with("2010-03-12 23:51") expect(events[1].get("message")).to start_with("2010-03-12 23:51") - expect(IO.read(log_completed_path)).to eq(file_path.to_s + "\n") end end context "for a compressed file" do it "the file is read" do - tmpfile_path = fixture_dir.join("compressed.*.*") - sincedb_path = Stud::Temporary.pathname file_path = fixture_dir.join('compressed.log.gz') file_path2 = fixture_dir.join('compressed.log.gzip') FileInput.make_fixture_current(file_path.to_path) - log_completed_path = Stud::Temporary.pathname + FileInput.make_fixture_current(file_path2.to_path) + tmpfile_path = fixture_dir.join("compressed.*.*") + directory = Stud::Temporary.directory + sincedb_path = ::File.join(directory, "readmode_C_sincedb.txt") + log_completed_path = ::File.join(directory, "C_completed.txt") conf = <<-CONFIG input { @@ -130,7 +163,6 @@ type => "blah" path => "#{tmpfile_path}" sincedb_path => "#{sincedb_path}" - delimiter => "#{FILE_DELIMITER}" mode => "read" file_completed_action => "log" file_completed_log_path => "#{log_completed_path}" @@ -139,6 +171,7 @@ CONFIG events = input(conf) do |pipeline, queue| + wait(0.5).for{IO.read(log_completed_path).scan(/compressed\.log\.gz(ip)?/).size}.to eq(2) 4.times.collect { queue.pop } end @@ -146,9 +179,6 @@ expect(events[1].get("message")).to start_with("2010-03-12 23:51") expect(events[2].get("message")).to start_with("2010-03-12 23:51") expect(events[3].get("message")).to start_with("2010-03-12 23:51") - logged_completions = IO.read(log_completed_path).split - expect(logged_completions.first).to match(/compressed\.log\.(gzip|gz)$/) - expect(logged_completions.last).to match(/compressed\.log\.(gzip|gz)$/) end end end diff --git a/spec/inputs/file_tail_spec.rb b/spec/inputs/file_tail_spec.rb index 369906e..1250eba 100644 --- a/spec/inputs/file_tail_spec.rb +++ b/spec/inputs/file_tail_spec.rb @@ -9,7 +9,7 @@ # LogStash::Logging::Logger::configure_logging("DEBUG") -TEST_FILE_DELIMITER = LogStash::Environment.windows? ? "\r\n" : "\n" +TEST_FILE_DELIMITER = $/ describe LogStash::Inputs::File do describe "'tail' mode testing with input(conf) do |pipeline, queue|" do @@ -22,152 +22,159 @@ end end - it "should start at the beginning of an existing file" do - tmpfile_path = Stud::Temporary.pathname - sincedb_path = Stud::Temporary.pathname - - conf = <<-CONFIG - input { - file { - type => "blah" - path => "#{tmpfile_path}" - start_position => "beginning" - sincedb_path => "#{sincedb_path}" - delimiter => "#{TEST_FILE_DELIMITER}" + let(:directory) { Stud::Temporary.directory } + let(:sincedb_dir) { Stud::Temporary.directory } + let(:tmpfile_path) { ::File.join(directory, "#{name}.txt") } + let(:sincedb_path) { ::File.join(sincedb_dir, "readmode_#{name}_sincedb.txt") } + let(:path_path) { ::File.join(directory, "*.txt") } + + context "for an existing file" do + let(:name) { "A" } + it "should start at the beginning" do + conf = <<-CONFIG + input { + file { + type => "blah" + path => "#{path_path}" + start_position => "beginning" + sincedb_path => "#{sincedb_path}" + delimiter => "#{TEST_FILE_DELIMITER}" + } } - } - CONFIG + CONFIG - File.open(tmpfile_path, "a") do |fd| - fd.puts("hello") - fd.puts("world") - fd.fsync - end + File.open(tmpfile_path, "a") do |fd| + fd.puts("hello") + fd.puts("world") + fd.fsync + end - events = input(conf) do |pipeline, queue| - 2.times.collect { queue.pop } + events = input(conf) do |pipeline, queue| + 2.times.collect { queue.pop } + end + expect(events.map{|e| e.get("message")}).to contain_exactly("hello", "world") end - expect(events.map{|e| e.get("message")}).to contain_exactly("hello", "world") end - it "should restart at the sincedb value" do - tmpfile_path = Stud::Temporary.pathname - sincedb_path = Stud::Temporary.pathname - - conf = <<-CONFIG - input { - file { - type => "blah" - path => "#{tmpfile_path}" - start_position => "beginning" - sincedb_path => "#{sincedb_path}" - delimiter => "#{TEST_FILE_DELIMITER}" + context "running the input twice" do + let(:name) { "B" } + it "should restart at the sincedb value" do + conf = <<-CONFIG + input { + file { + type => "blah" + path => "#{path_path}" + start_position => "beginning" + sincedb_path => "#{sincedb_path}" + "file_sort_by" => "path" + delimiter => "#{TEST_FILE_DELIMITER}" + } } - } - CONFIG + CONFIG - File.open(tmpfile_path, "w") do |fd| - fd.puts("hello3") - fd.puts("world3") - end + File.open(tmpfile_path, "w") do |fd| + fd.puts("hello3") + fd.puts("world3") + end - events = input(conf) do |pipeline, queue| - 2.times.collect { queue.pop } - end + events = input(conf) do |pipeline, queue| + 2.times.collect { queue.pop } + end - expect(events.map{|e| e.get("message")}).to contain_exactly("hello3", "world3") + expect(events.map{|e| e.get("message")}).to contain_exactly("hello3", "world3") - File.open(tmpfile_path, "a") do |fd| - fd.puts("foo") - fd.puts("bar") - fd.puts("baz") - fd.fsync - end + File.open(tmpfile_path, "a") do |fd| + fd.puts("foo") + fd.puts("bar") + fd.puts("baz") + fd.fsync + end - events = input(conf) do |pipeline, queue| - 3.times.collect { queue.pop } + events = input(conf) do |pipeline, queue| + 3.times.collect { queue.pop } + end + messages = events.map{|e| e.get("message")} + expect(messages).to contain_exactly("foo", "bar", "baz") end - messages = events.map{|e| e.get("message")} - expect(messages).to contain_exactly("foo", "bar", "baz") end - it "should not overwrite existing path and host fields" do - tmpfile_path = Stud::Temporary.pathname - sincedb_path = Stud::Temporary.pathname - - conf = <<-CONFIG - input { - file { - type => "blah" - path => "#{tmpfile_path}" - start_position => "beginning" - sincedb_path => "#{sincedb_path}" - delimiter => "#{TEST_FILE_DELIMITER}" - codec => "json" + context "when path and host fields exist" do + let(:name) { "C" } + it "should not overwrite them" do + conf = <<-CONFIG + input { + file { + type => "blah" + path => "#{path_path}" + start_position => "beginning" + sincedb_path => "#{sincedb_path}" + delimiter => "#{TEST_FILE_DELIMITER}" + codec => "json" + } } - } - CONFIG + CONFIG - File.open(tmpfile_path, "w") do |fd| - fd.puts('{"path": "my_path", "host": "my_host"}') - fd.puts('{"my_field": "my_val"}') - fd.fsync - end + File.open(tmpfile_path, "w") do |fd| + fd.puts('{"path": "my_path", "host": "my_host"}') + fd.puts('{"my_field": "my_val"}') + fd.fsync + end - events = input(conf) do |pipeline, queue| - 2.times.collect { queue.pop } - end + events = input(conf) do |pipeline, queue| + 2.times.collect { queue.pop } + end - existing_path_index, added_path_index = "my_val" == events[0].get("my_field") ? [1,0] : [0,1] + existing_path_index, added_path_index = "my_val" == events[0].get("my_field") ? [1,0] : [0,1] - expect(events[existing_path_index].get("path")).to eq "my_path" - expect(events[existing_path_index].get("host")).to eq "my_host" - expect(events[existing_path_index].get("[@metadata][host]")).to eq "#{Socket.gethostname.force_encoding(Encoding::UTF_8)}" + expect(events[existing_path_index].get("path")).to eq "my_path" + expect(events[existing_path_index].get("host")).to eq "my_host" + expect(events[existing_path_index].get("[@metadata][host]")).to eq "#{Socket.gethostname.force_encoding(Encoding::UTF_8)}" - expect(events[added_path_index].get("path")).to eq "#{tmpfile_path}" - expect(events[added_path_index].get("host")).to eq "#{Socket.gethostname.force_encoding(Encoding::UTF_8)}" - expect(events[added_path_index].get("[@metadata][host]")).to eq "#{Socket.gethostname.force_encoding(Encoding::UTF_8)}" + expect(events[added_path_index].get("path")).to eq "#{tmpfile_path}" + expect(events[added_path_index].get("host")).to eq "#{Socket.gethostname.force_encoding(Encoding::UTF_8)}" + expect(events[added_path_index].get("[@metadata][host]")).to eq "#{Socket.gethostname.force_encoding(Encoding::UTF_8)}" + end end - it "should read old files" do - tmpfile_path = Stud::Temporary.pathname - - conf = <<-CONFIG - input { - file { - type => "blah" - path => "#{tmpfile_path}" - start_position => "beginning" - codec => "json" + context "running the input twice" do + let(:name) { "D" } + it "should read old files" do + conf = <<-CONFIG + input { + file { + type => "blah" + path => "#{path_path}" + start_position => "beginning" + codec => "json" + } } - } - CONFIG + CONFIG - File.open(tmpfile_path, "w") do |fd| - fd.puts('{"path": "my_path", "host": "my_host"}') - fd.puts('{"my_field": "my_val"}') - fd.fsync - end - # arbitrary old file (2 days) - FileInput.make_file_older(tmpfile_path, 48 * 60 * 60) + File.open(tmpfile_path, "w") do |fd| + fd.puts('{"path": "my_path", "host": "my_host"}') + fd.puts('{"my_field": "my_val"}') + fd.fsync + end + # arbitrary old file (2 days) + FileInput.make_file_older(tmpfile_path, 48 * 60 * 60) - events = input(conf) do |pipeline, queue| - 2.times.collect { queue.pop } + events = input(conf) do |pipeline, queue| + 2.times.collect { queue.pop } + end + existing_path_index, added_path_index = "my_val" == events[0].get("my_field") ? [1,0] : [0,1] + expect(events[existing_path_index].get("path")).to eq "my_path" + expect(events[existing_path_index].get("host")).to eq "my_host" + expect(events[existing_path_index].get("[@metadata][host]")).to eq "#{Socket.gethostname.force_encoding(Encoding::UTF_8)}" + + expect(events[added_path_index].get("path")).to eq "#{tmpfile_path}" + expect(events[added_path_index].get("host")).to eq "#{Socket.gethostname.force_encoding(Encoding::UTF_8)}" + expect(events[added_path_index].get("[@metadata][host]")).to eq "#{Socket.gethostname.force_encoding(Encoding::UTF_8)}" end - existing_path_index, added_path_index = "my_val" == events[0].get("my_field") ? [1,0] : [0,1] - expect(events[existing_path_index].get("path")).to eq "my_path" - expect(events[existing_path_index].get("host")).to eq "my_host" - expect(events[existing_path_index].get("[@metadata][host]")).to eq "#{Socket.gethostname.force_encoding(Encoding::UTF_8)}" - - expect(events[added_path_index].get("path")).to eq "#{tmpfile_path}" - expect(events[added_path_index].get("host")).to eq "#{Socket.gethostname.force_encoding(Encoding::UTF_8)}" - expect(events[added_path_index].get("[@metadata][host]")).to eq "#{Socket.gethostname.force_encoding(Encoding::UTF_8)}" end - context "when sincedb_path is an existing directory" do - let(:tmpfile_path) { Stud::Temporary.pathname } - let(:sincedb_path) { Stud::Temporary.directory } - subject { LogStash::Inputs::File.new("path" => tmpfile_path, "sincedb_path" => sincedb_path) } + context "when sincedb_path is a directory" do + let(:name) { "E" } + subject { LogStash::Inputs::File.new("path" => path_path, "sincedb_path" => directory) } after :each do FileUtils.rm_rf(sincedb_path) @@ -180,16 +187,19 @@ end describe "testing with new, register, run and stop" do + let(:suffix) { "A" } let(:conf) { Hash.new } let(:mlconf) { Hash.new } let(:events) { Array.new } let(:mlcodec) { LogStash::Codecs::Multiline.new(mlconf) } - let(:codec) { FileInput::CodecTracer.new } - let(:tmpfile_path) { Stud::Temporary.pathname } - let(:sincedb_path) { Stud::Temporary.pathname } + let(:tracer_codec) { FileInput::CodecTracer.new } let(:tmpdir_path) { Stud::Temporary.directory } + let(:tmpfile_path) { ::File.join(tmpdir_path, "#{suffix}.txt") } + let(:path_path) { ::File.join(tmpdir_path, "*.txt") } + let(:sincedb_path) { ::File.join(tmpdir_path, "sincedb-#{suffix}") } after :each do + sleep(0.1) until subject.completely_stopped? FileUtils.rm_rf(sincedb_path) end @@ -204,7 +214,7 @@ end mlconf.update("pattern" => "^\s", "what" => "previous") conf.update("type" => "blah", - "path" => tmpfile_path, + "path" => path_path, "sincedb_path" => sincedb_path, "stat_interval" => 0.1, "codec" => mlcodec, @@ -213,16 +223,22 @@ it "reads the appended data only" do subject.register - RSpec::Sequencing - .run_after(0.2, "assert zero events then append two lines") do - expect(events.size).to eq(0) + actions = RSpec::Sequencing + .run_after(1, "append two lines after delay") do File.open(tmpfile_path, "a") { |fd| fd.puts("hello"); fd.puts("world") } end - .then_after(0.4, "quit") do + .then("wait for one event") do + wait(0.75).for{events.size}.to eq(1) + end + .then("quit") do subject.stop end + .then("wait for flushed event") do + wait(0.75).for{events.size}.to eq(2) + end subject.run(events) + actions.assert_no_errors event1 = events[0] expect(event1).not_to be_nil @@ -240,218 +256,172 @@ context "when close_older config is specified" do let(:line) { "line1.1-of-a" } - + let(:suffix) { "X" } subject { described_class.new(conf) } before do conf.update( "type" => "blah", - "path" => "#{tmpdir_path}/*.log", + "path" => path_path, "sincedb_path" => sincedb_path, "stat_interval" => 0.02, - "codec" => codec, - "close_older" => 0.5, + "codec" => tracer_codec, + "close_older" => "100 ms", + "start_position" => "beginning", "delimiter" => TEST_FILE_DELIMITER) subject.register end - it "having timed_out, the identity is evicted" do - RSpec::Sequencing + it "having timed_out, the codec is auto flushed" do + actions = RSpec::Sequencing .run("create file") do - File.open("#{tmpdir_path}/a.log", "wb") { |file| file.puts(line) } + File.open(tmpfile_path, "wb") { |file| file.puts(line) } end - .then_after(0.3, "identity is mapped") do - expect(codec.trace_for(:accept)).to eq([true]) - expect(subject.codec.identity_count).to eq(1) + .then_after(0.1, "identity is mapped") do + wait(0.75).for{subject.codec.identity_map[tmpfile_path]}.not_to be_nil, "identity is not mapped" end - .then_after(0.3, "test for auto_flush") do - expect(codec.trace_for(:auto_flush)).to eq([true]) - expect(subject.codec.identity_count).to eq(0) + .then("wait for auto_flush") do + wait(0.75).for{subject.codec.identity_map[tmpfile_path].codec.trace_for(:auto_flush)}.to eq([true]), "autoflush didn't" end - .then_after(0.1, "quit") do + .then("quit") do subject.stop end subject.run(events) + actions.assert_no_errors + expect(subject.codec.identity_map[tmpfile_path].codec.trace_for(:accept)).to eq([true]) end end context "when ignore_older config is specified" do - let(:line) { "line1.1-of-a" } - let(:tmp_dir_file) { "#{tmpdir_path}/a.log" } - - subject { described_class.new(conf) } - + let(:suffix) { "Y" } before do - File.open(tmp_dir_file, "a") do |fd| - fd.puts(line) - fd.fsync - end - FileInput.make_file_older(tmp_dir_file, 2) conf.update( "type" => "blah", - "path" => "#{tmpdir_path}/*.log", + "path" => path_path, "sincedb_path" => sincedb_path, "stat_interval" => 0.02, - "codec" => codec, - "ignore_older" => 1, + "codec" => tracer_codec, + "ignore_older" => "500 ms", "delimiter" => TEST_FILE_DELIMITER) - - subject.register - Thread.new { subject.run(events) } end + subject { described_class.new(conf) } + let(:line) { "line1.1-of-a" } it "the file is not read" do - sleep 0.1 - subject.stop - expect(codec).to receive_call_and_args(:accept, false) - expect(codec).to receive_call_and_args(:auto_flush, false) - expect(subject.codec.identity_count).to eq(0) + subject.register + RSpec::Sequencing + .run("create file") do + File.open(tmp_dir_file, "a") do |fd| + fd.puts(line) + fd.fsync + end + FileInput.make_file_older(tmp_dir_file, 2) + end + .then_after(0.5, "stop") do + subject.stop + end + subject.run(events) + expect(subject.codec.identity_map[tmpfile_path].codec.trace_for(:accept)).to be_falsey end end context "when wildcard path and a multiline codec is specified" do subject { described_class.new(conf) } - + let(:suffix) { "J" } + let(:tmpfile_path2) { ::File.join(tmpdir_path, "K.txt") } before do mlconf.update("pattern" => "^\s", "what" => "previous") conf.update( "type" => "blah", - "path" => "#{tmpdir_path}/*.log", + "path" => path_path, + "start_position" => "beginning", "sincedb_path" => sincedb_path, "stat_interval" => 0.05, "codec" => mlcodec, + "file_sort_by" => "path", "delimiter" => TEST_FILE_DELIMITER) subject.register end it "collects separate multiple line events from each file" do + subject actions = RSpec::Sequencing .run_after(0.1, "create files") do - File.open("#{tmpdir_path}/A.log", "wb") do |fd| - fd.puts("line1.1-of-a") - fd.puts(" line1.2-of-a") - fd.puts(" line1.3-of-a") + File.open(tmpfile_path, "wb") do |fd| + fd.puts("line1.1-of-J") + fd.puts(" line1.2-of-J") + fd.puts(" line1.3-of-J") end - File.open("#{tmpdir_path}/z.log", "wb") do |fd| - fd.puts("line1.1-of-z") - fd.puts(" line1.2-of-z") - fd.puts(" line1.3-of-z") + File.open(tmpfile_path2, "wb") do |fd| + fd.puts("line1.1-of-K") + fd.puts(" line1.2-of-K") + fd.puts(" line1.3-of-K") end end - .then_after(0.2, "assert both files are mapped as identities and stop") do - expect(subject.codec.identity_count).to eq(2) + .then("assert both files are mapped as identities and stop") do + wait(2).for {subject.codec.identity_count}.to eq(2), "both files are not mapped as identities" end - .then_after(0.1, "stop") do + .then("stop") do subject.stop end - .then_after(0.2 , "stop flushes both events") do - expect(events.size).to eq(2) - e1, e2 = events - e1_message = e1.get("message") - e2_message = e2.get("message") - - # can't assume File A will be read first - if e1_message.start_with?('line1.1-of-z') - expect(e1.get("path")).to match(/z.log/) - expect(e2.get("path")).to match(/A.log/) - expect(e1_message).to eq("line1.1-of-z#{TEST_FILE_DELIMITER} line1.2-of-z#{TEST_FILE_DELIMITER} line1.3-of-z") - expect(e2_message).to eq("line1.1-of-a#{TEST_FILE_DELIMITER} line1.2-of-a#{TEST_FILE_DELIMITER} line1.3-of-a") - else - expect(e1.get("path")).to match(/A.log/) - expect(e2.get("path")).to match(/z.log/) - expect(e1_message).to eq("line1.1-of-a#{TEST_FILE_DELIMITER} line1.2-of-a#{TEST_FILE_DELIMITER} line1.3-of-a") - expect(e2_message).to eq("line1.1-of-z#{TEST_FILE_DELIMITER} line1.2-of-z#{TEST_FILE_DELIMITER} line1.3-of-z") - end - end subject.run(events) # wait for actions to complete - actions.value + actions.assert_no_errors + expect(events.size).to eq(2) + e1, e2 = events + e1_message = e1.get("message") + e2_message = e2.get("message") + + expect(e1.get("path")).to match(/J.txt/) + expect(e2.get("path")).to match(/K.txt/) + expect(e1_message).to eq("line1.1-of-J#{TEST_FILE_DELIMITER} line1.2-of-J#{TEST_FILE_DELIMITER} line1.3-of-J") + expect(e2_message).to eq("line1.1-of-K#{TEST_FILE_DELIMITER} line1.2-of-K#{TEST_FILE_DELIMITER} line1.3-of-K") end context "if auto_flush is enabled on the multiline codec" do let(:mlconf) { { "auto_flush_interval" => 0.5 } } - + let(:suffix) { "M" } it "an event is generated via auto_flush" do actions = RSpec::Sequencing .run_after(0.1, "create files") do - File.open("#{tmpdir_path}/A.log", "wb") do |fd| + File.open(tmpfile_path, "wb") do |fd| fd.puts("line1.1-of-a") fd.puts(" line1.2-of-a") fd.puts(" line1.3-of-a") end end - .then_after(0.75, "wait for auto_flush") do - e1 = events.first - e1_message = e1.get("message") - expect(e1["path"]).to match(/a.log/) - expect(e1_message).to eq("line1.1-of-a#{TEST_FILE_DELIMITER} line1.2-of-a#{TEST_FILE_DELIMITER} line1.3-of-a") + .then("wait for auto_flush") do + wait(2).for{events.size}.to eq(1), "events size is not 1" end .then("stop") do subject.stop end subject.run(events) # wait for actions to complete - actions.value + actions.assert_no_errors + e1 = events.first + e1_message = e1.get("message") + expect(e1_message).to eq("line1.1-of-a#{TEST_FILE_DELIMITER} line1.2-of-a#{TEST_FILE_DELIMITER} line1.3-of-a") + expect(e1.get("path")).to match(/M.txt$/) end end end - context "when #run is called multiple times", :unix => true do - let(:file_path) { "#{tmpdir_path}/a.log" } - let(:buffer) { [] } - let(:run_thread_proc) do - lambda { Thread.new { subject.run(buffer) } } - end - let(:lsof_proc) do - lambda { `lsof -p #{Process.pid} | grep #{file_path}` } - end - - subject { described_class.new(conf) } - - before do - conf.update( - "path" => tmpdir_path + "/*.log", - "start_position" => "beginning", - "stat_interval" => 0.1, - "sincedb_path" => sincedb_path) - - File.open(file_path, "w") do |fd| - fd.puts('foo') - fd.puts('bar') - fd.fsync - end - end - - it "should only actually open files when content changes are detected" do - subject.register - expect(lsof_proc.call).to eq("") - # first run processes the file and records sincedb progress - run_thread_proc.call - wait(1).for{lsof_proc.call.scan(file_path).size}.to eq(1) - # second run quits the first run - # sees the file has not changed size and does not open it - run_thread_proc.call - wait(1).for{lsof_proc.call.scan(file_path).size}.to eq(0) - # truncate and write less than before - File.open(file_path, "w"){ |fd| fd.puts('baz'); fd.fsync } - # sees the file has changed size and does open it - wait(1).for{lsof_proc.call.scan(file_path).size}.to eq(1) - end - end - describe "specifying max_open_files" do + let(:suffix) { "P" } + let(:tmpfile_path2) { ::File.join(tmpdir_path, "Q.txt") } subject { described_class.new(conf) } before do - File.open("#{tmpdir_path}/a.log", "w") do |fd| - fd.puts("line1-of-a") - fd.puts("line2-of-a") + File.open(tmpfile_path, "w") do |fd| + fd.puts("line1-of-P") + fd.puts("line2-of-P") fd.fsync end - File.open("#{tmpdir_path}/z.log", "w") do |fd| - fd.puts("line1-of-z") - fd.puts("line2-of-z") + File.open(tmpfile_path2, "w") do |fd| + fd.puts("line1-of-Q") + fd.puts("line2-of-Q") fd.fsync end end @@ -461,37 +431,34 @@ conf.clear conf.update( "type" => "blah", - "path" => "#{tmpdir_path}/*.log", + "path" => path_path, "sincedb_path" => sincedb_path, "stat_interval" => 0.1, "max_open_files" => 1, "start_position" => "beginning", + "file_sort_by" => "path", "delimiter" => TEST_FILE_DELIMITER) subject.register end it "collects line events from only one file" do actions = RSpec::Sequencing - .run_after(0.2, "assert one identity is mapped") do - expect(subject.codec.identity_count).to eq(1) + .run("assert one identity is mapped") do + wait(0.4).for{subject.codec.identity_count}.to be > 0, "no identity is mapped" end - .then_after(0.1, "stop") do + .then("stop") do subject.stop end - .then_after(0.1, "stop flushes last event") do - expect(events.size).to eq(2) - e1, e2 = events - if Dir.glob("#{tmpdir_path}/*.log").first =~ %r{a\.log} - #linux and OSX have different retrieval order - expect(e1.get("message")).to eq("line1-of-a") - expect(e2.get("message")).to eq("line2-of-a") - else - expect(e1.get("message")).to eq("line1-of-z") - expect(e2.get("message")).to eq("line2-of-z") - end + .then("stop flushes last event") do + wait(0.4).for{events.size}.to eq(4), "events size does not equal 4" end subject.run(events) # wait for actions future value - actions.value + actions.assert_no_errors + e1, e2, e3, e4 = events + expect(e1.get("message")).to eq("line1-of-P") + expect(e2.get("message")).to eq("line2-of-P") + expect(e3.get("message")).to eq("line1-of-Q") + expect(e4.get("message")).to eq("line2-of-Q") end end @@ -499,41 +466,36 @@ before do conf.update( "type" => "blah", - "path" => "#{tmpdir_path}/*.log", + "path" => path_path, "sincedb_path" => sincedb_path, "stat_interval" => 0.1, "max_open_files" => 1, "close_older" => 0.5, "start_position" => "beginning", + "file_sort_by" => "path", "delimiter" => TEST_FILE_DELIMITER) subject.register end it "collects line events from both files" do actions = RSpec::Sequencing - .run_after(0.2, "assert both identities are mapped and the first two events are built") do - expect(subject.codec.identity_count).to eq(2) - expect(events.size).to eq(2) + .run("assert both identities are mapped and the first two events are built") do + wait(0.2).for{subject.codec.identity_count == 2 && events.size > 1}.to eq(true), "both identities are not mapped and the first two events are not built" end - .then_after(0.8, "wait for close to flush last event of each identity") do - expect(events.size).to eq(4) - if Dir.glob("#{tmpdir_path}/*.log").first =~ %r{a\.log} - #linux and OSX have different retrieval order - e1, e2, e3, e4 = events - else - e3, e4, e1, e2 = events - end - expect(e1.get("message")).to eq("line1-of-a") - expect(e2.get("message")).to eq("line2-of-a") - expect(e3.get("message")).to eq("line1-of-z") - expect(e4.get("message")).to eq("line2-of-z") + .then("wait for close to flush last event of each identity") do + wait(0.8).for{events.size}.to eq(4), "close does not flush last event of each identity" end .then_after(0.1, "stop") do subject.stop end subject.run(events) # wait for actions future value - actions.value + actions.assert_no_errors + e1, e2, e3, e4 = events + expect(e1.get("message")).to eq("line1-of-P") + expect(e2.get("message")).to eq("line2-of-P") + expect(e3.get("message")).to eq("line1-of-Q") + expect(e4.get("message")).to eq("line2-of-Q") end end end diff --git a/src/main/java/jnr/posix/windows/WindowsFileInformationByHandle.java b/src/main/java/jnr/posix/windows/WindowsFileInformationByHandle.java new file mode 100644 index 0000000..7e36502 --- /dev/null +++ b/src/main/java/jnr/posix/windows/WindowsFileInformationByHandle.java @@ -0,0 +1,24 @@ +package jnr.posix.windows; +/* +This, sadly can't be used. +See JrubyFileWatchLibrary class +The jnr jar is loaded by a different class loader than our jar (in rspec anyway) +Even though the package is the same, Java restricts access to `dwVolumeSerialNumber` in the super class +We have to continue to use FFI in Ruby. +*/ + +public class WindowsFileInformationByHandle extends WindowsByHandleFileInformation { + public WindowsFileInformationByHandle(jnr.ffi.Runtime runtime) { + super(runtime); + } + + public java.lang.String getIdentifier() { + StringBuilder builder = new StringBuilder(); + builder.append(dwVolumeSerialNumber.intValue()); + builder.append("-"); + builder.append(nFileIndexHigh.intValue()); + builder.append("-"); + builder.append(nFileIndexLow.intValue()); + return builder.toString(); + } +} diff --git a/src/main/java/org/logstash/filewatch/JrubyFileWatchLibrary.java b/src/main/java/org/logstash/filewatch/JrubyFileWatchLibrary.java index 7a859a1..e37d186 100644 --- a/src/main/java/org/logstash/filewatch/JrubyFileWatchLibrary.java +++ b/src/main/java/org/logstash/filewatch/JrubyFileWatchLibrary.java @@ -12,28 +12,40 @@ * fnv code extracted and modified from https://github.com/jakedouglas/fnv-java */ +import jnr.ffi.Runtime; +import jnr.posix.HANDLE; +import jnr.posix.JavaLibCHelper; +import jnr.posix.POSIX; +import jnr.posix.WindowsLibC; +import jnr.posix.WindowsPOSIX; +import jnr.posix.util.WindowsHelpers; +import jnr.posix.windows.WindowsFileInformationByHandle; import org.jruby.Ruby; import org.jruby.RubyBignum; import org.jruby.RubyClass; import org.jruby.RubyFixnum; import org.jruby.RubyIO; -import org.jruby.RubyInteger; import org.jruby.RubyModule; +import org.jruby.RubyNumeric; import org.jruby.RubyObject; import org.jruby.RubyString; import org.jruby.anno.JRubyClass; import org.jruby.anno.JRubyMethod; +import org.jruby.ext.ffi.Factory; +import org.jruby.ext.ffi.MemoryIO; +import org.jruby.ext.ffi.Pointer; import org.jruby.runtime.Arity; +import org.jruby.runtime.Block; import org.jruby.runtime.ThreadContext; import org.jruby.runtime.builtin.IRubyObject; import org.jruby.runtime.load.Library; +import org.jruby.util.io.OpenFile; import java.io.IOException; import java.math.BigInteger; import java.nio.channels.Channel; import java.nio.channels.FileChannel; import java.nio.file.FileSystems; -import java.nio.file.OpenOption; import java.nio.file.Path; import java.nio.file.StandardOpenOption; @@ -47,6 +59,21 @@ public class JrubyFileWatchLibrary implements Library { private static final BigInteger MOD32 = new BigInteger("2").pow(32); private static final BigInteger MOD64 = new BigInteger("2").pow(64); + // private static final int GENERIC_ALL = 268435456; + private static final int GENERIC_READ = -2147483648; + // private static final int GENERIC_WRITE = 1073741824; + // private static final int GENERIC_EXECUTE = 33554432; + // private static final int FILE_SHARE_DELETE = 4; + private static final int FILE_SHARE_READ = 1; + private static final int FILE_SHARE_WRITE = 2; + // private static final int CREATE_ALWAYS = 2; + // private static final int CREATE_NEW = 1; + // private static final int OPEN_ALWAYS = 4; + private static final int OPEN_EXISTING = 3; + // private static final int TRUNCATE_EXISTING = 5; + private static final int FILE_FLAG_BACKUP_SEMANTICS = 33554432; + // private static final int FILE_ATTRIBUTE_READONLY = 1; + @Override public final void load(final Ruby runtime, final boolean wrap) { final RubyModule module = runtime.defineModule("FileWatch"); @@ -59,30 +86,119 @@ public final void load(final Ruby runtime, final boolean wrap) { } - @JRubyClass(name = "FileExt", parent = "Object") + @JRubyClass(name = "FileExt") public static class RubyFileExt extends RubyObject { - public RubyFileExt(final Ruby runtime, final RubyClass metaClass) { - super(runtime, metaClass); + public RubyFileExt(final Ruby runtime, final RubyClass meta) { + super(runtime, meta); } - public RubyFileExt(final RubyClass metaClass) { - super(metaClass); + public RubyFileExt(final RubyClass meta) { + super(meta); } @JRubyMethod(name = "open", required = 1, meta = true) - public static RubyIO open(final ThreadContext context, final IRubyObject self, final RubyString path) throws IOException { + public static IRubyObject open(final ThreadContext context, final IRubyObject self, final RubyString path) throws IOException { final Path javapath = FileSystems.getDefault().getPath(path.asJavaString()); - final OpenOption[] options = new OpenOption[1]; - options[0] = StandardOpenOption.READ; - final Channel channel = FileChannel.open(javapath, options); - return new RubyIO(Ruby.getGlobalRuntime(), channel); + final Channel channel = FileChannel.open(javapath, StandardOpenOption.READ); + final RubyIO irubyobject = new RubyWinIO(context.runtime, channel); + return irubyobject; + } + + @JRubyMethod(name = "io_handle", required = 1, meta = true) + public static IRubyObject ioHandle(final ThreadContext context, final IRubyObject self, final IRubyObject object, Block block) { + final Ruby runtime = context.runtime; + if (!block.isGiven()) { + throw runtime.newArgumentError(0, 1); + } + if (object instanceof RubyWinIO) { + final RubyWinIO rubyWinIO = (RubyWinIO) object; + final OpenFile fptr = rubyWinIO.getOpenFileChecked(); + final boolean locked = fptr.lock(); + try { + fptr.checkClosed(); + if (rubyWinIO.isDirect()) { + final MemoryIO memoryio = Factory.getInstance().wrapDirectMemory(runtime, rubyWinIO.getAddress()); + final Pointer pointer = new Pointer(runtime, memoryio); + return block.yield(context, pointer); + } + } finally { + if (locked) { + fptr.unlock(); + } + } + } else { + System.out.println("Required argument is not a WinIO instance"); + } + return runtime.newString(); + } + + //@JRubyMethod(name = "io_inode", required = 1, meta = true) + public static RubyString ioInode(final ThreadContext context, final IRubyObject self, final IRubyObject object) { + final Ruby runtime = context.runtime; + if (!(object instanceof RubyIO)) { + System.out.println("Required argument is not an IO instance"); + return runtime.newString(); + } + final RubyIO rubyIO = (RubyIO) object; + final OpenFile fptr = rubyIO.getOpenFileChecked(); + final boolean locked = fptr.lock(); + String inode = ""; + try { + fptr.checkClosed(); + final POSIX posix = runtime.getPosix(); + final int realFileno = fptr.fd().realFileno; + if (posix.isNative() && posix instanceof WindowsPOSIX && realFileno != -1) { + final WindowsPOSIX winposix = (WindowsPOSIX) posix; + final WindowsLibC wlibc = (WindowsLibC) winposix.libc(); + final WindowsFileInformationByHandle info = new WindowsFileInformationByHandle(Runtime.getRuntime(runtime.getPosix().libc())); + final HANDLE handle = JavaLibCHelper.gethandle(JavaLibCHelper.getDescriptorFromChannel(fptr.fd().chFile)); + if (handle.isValid()) { + if (wlibc.GetFileInformationByHandle(handle, info) > 0) { + inode = info.getIdentifier(); + } else { + System.out.println("Could not 'GetFileInformationByHandle' from handle"); + } + } else { + System.out.println("Could not derive 'HANDLE' from Ruby IO instance via io.getOpenFileChecked().fd().chFile"); + } + } + } finally { + if (locked) { + fptr.unlock(); + } + } + return runtime.newString(inode); + } + + //@JRubyMethod(name = "path_inode", required = 1, meta = true) + public static RubyString pathInode(final ThreadContext context, final IRubyObject self, final RubyString path) { + final Ruby runtime = context.runtime; + final POSIX posix = runtime.getPosix(); + String inode = ""; + if (posix.isNative() && posix.libc() instanceof WindowsLibC) { + final WindowsLibC wlibc = (WindowsLibC) posix.libc(); + final byte[] wpath = WindowsHelpers.toWPath(path.toString()); + final HANDLE handle = wlibc.CreateFileW(wpath, GENERIC_READ, FILE_SHARE_READ | FILE_SHARE_WRITE, null, OPEN_EXISTING, FILE_FLAG_BACKUP_SEMANTICS, 0); + if (handle.isValid()) { + final WindowsFileInformationByHandle info = new WindowsFileInformationByHandle(Runtime.getRuntime(runtime.getPosix().libc())); + if (wlibc.GetFileInformationByHandle(handle, info) > 0) { + inode = info.getIdentifier(); + } else { + System.out.println("Could not 'GetFileInformationByHandle' from handle"); + } + wlibc.CloseHandle(handle); + } else { + System.out.printf("Could not open file via 'CreateFileW' on path: %s", path.toString()); + } + } + return runtime.newString(inode); } } // This class may be used by fingerprinting in the future @SuppressWarnings({"NewMethodNamingConvention", "ChainOfInstanceofChecks"}) - @JRubyClass(name = "Fnv", parent = "Object") + @JRubyClass(name = "Fnv") public static class Fnv extends RubyObject { private byte[] bytes; @@ -98,12 +214,12 @@ public Fnv(final RubyClass metaClass) { } @JRubyMethod(name = "coerce_bignum", meta = true, required = 1) - public static IRubyObject coerceBignum(final ThreadContext ctx, final IRubyObject recv, final IRubyObject i) { - if (i instanceof RubyBignum) { - return i; + public static IRubyObject coerceBignum(final ThreadContext ctx, final IRubyObject recv, final IRubyObject rubyObject) { + if (rubyObject instanceof RubyBignum) { + return rubyObject; } - if (i instanceof RubyFixnum) { - return RubyBignum.newBignum(ctx.runtime, ((RubyFixnum)i).getBigIntegerValue()); + if (rubyObject instanceof RubyFixnum) { + return RubyBignum.newBignum(ctx.runtime, ((RubyNumeric) rubyObject).getBigIntegerValue()); } throw ctx.runtime.newRaiseException(ctx.runtime.getClass("StandardError"), "Can't coerce"); } @@ -141,7 +257,7 @@ public IRubyObject closed_p(final ThreadContext ctx) { } @JRubyMethod(name = "fnv1a32", optional = 1) - public IRubyObject fnv1a_32(final ThreadContext ctx, IRubyObject[] args) { + public IRubyObject fnv1a_32(final ThreadContext ctx, final IRubyObject[] args) { IRubyObject[] args1 = args; if(open) { args1 = Arity.scanArgs(ctx.runtime, args1, 0, 1); @@ -151,7 +267,7 @@ public IRubyObject fnv1a_32(final ThreadContext ctx, IRubyObject[] args) { } @JRubyMethod(name = "fnv1a64", optional = 1) - public IRubyObject fnv1a_64(final ThreadContext ctx, IRubyObject[] args) { + public IRubyObject fnv1a_64(final ThreadContext ctx, final IRubyObject[] args) { IRubyObject[] args1 = args; if(open) { args1 = Arity.scanArgs(ctx.runtime, args1, 0, 1); @@ -161,13 +277,9 @@ public IRubyObject fnv1a_64(final ThreadContext ctx, IRubyObject[] args) { } private long convertLong(final IRubyObject obj) { - if(obj instanceof RubyInteger) { - return ((RubyInteger)obj).getLongValue(); + if(obj instanceof RubyNumeric) { + return ((RubyNumeric) obj).getLongValue(); } - if(obj instanceof RubyFixnum) { - return ((RubyFixnum)obj).getLongValue(); - } - return size; } diff --git a/src/main/java/org/logstash/filewatch/RubyWinIO.java b/src/main/java/org/logstash/filewatch/RubyWinIO.java new file mode 100644 index 0000000..c6cfbe4 --- /dev/null +++ b/src/main/java/org/logstash/filewatch/RubyWinIO.java @@ -0,0 +1,65 @@ +package org.logstash.filewatch; + +import jnr.posix.HANDLE; +import jnr.posix.JavaLibCHelper; +import org.jruby.Ruby; +import org.jruby.RubyBoolean; +import org.jruby.RubyIO; +import org.jruby.anno.JRubyClass; +import org.jruby.anno.JRubyMethod; +import org.jruby.runtime.ThreadContext; +import org.jruby.runtime.builtin.IRubyObject; +import org.jruby.util.io.OpenFile; + +import java.nio.channels.Channel; + +@JRubyClass(name = "WinIO") +public class RubyWinIO extends RubyIO { + private boolean valid; + private boolean direct; + private long address; + + public RubyWinIO(Ruby runtime, Channel channel) { + super(runtime, channel); + final OpenFile fptr = getOpenFileChecked(); + final boolean locked = fptr.lock(); + try { + fptr.checkClosed(); + final HANDLE handle = JavaLibCHelper.gethandle(JavaLibCHelper.getDescriptorFromChannel(fptr.fd().chFile)); + if (handle.isValid()) { + direct = handle.toPointer().isDirect(); + address = handle.toPointer().address(); + valid = true; + } else { + direct = false; + address = 0L; + valid = false; + } + } finally { + if (locked) { + fptr.unlock(); + } + } + } + + @JRubyMethod(name = "valid?") + public RubyBoolean valid_p(ThreadContext context) { + return context.runtime.newBoolean(valid); + } + + @Override + @JRubyMethod + public IRubyObject close() { + direct = false; + address = 0L; + return super.close(); + } + + final public boolean isDirect() { + return direct; + } + + final public long getAddress() { + return address; + } +}