diff --git a/.gitignore b/.gitignore index 3300a23..8e7167a 100644 --- a/.gitignore +++ b/.gitignore @@ -2,3 +2,4 @@ Gemfile.lock .bundle vendor +lib/jars diff --git a/JAR_VERSION b/JAR_VERSION new file mode 100644 index 0000000..3eefcb9 --- /dev/null +++ b/JAR_VERSION @@ -0,0 +1 @@ +1.0.0 diff --git a/Rakefile b/Rakefile index 4f4b858..e1595e1 100644 --- a/Rakefile +++ b/Rakefile @@ -5,3 +5,9 @@ task :default do end require "logstash/devutils/rake" + +desc "Compile and put filewatch jar into lib/jars" +task :vendor do + exit(1) unless system './gradlew clean jar' + puts "-------------------> built filewatch jar via rake" +end diff --git a/build.gradle b/build.gradle new file mode 100644 index 0000000..f2fd43f --- /dev/null +++ b/build.gradle @@ -0,0 +1,68 @@ +/* + * Licensed to Elasticsearch under one or more contributor + * license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright + * ownership. Elasticsearch licenses this file to you under + * the Apache License, Version 2.0 (the "License"); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +apply plugin: "java" +apply plugin: "distribution" +apply plugin: "idea" + +group = 'org.logstash.filewatch' +version file("JAR_VERSION").text.replaceAll("\\s","") + +repositories { + mavenCentral() +} + +project.sourceCompatibility = 1.8 + +dependencies { + compileOnly group: 'org.jruby', name: 'jruby-complete', version: "9.1.13.0" +} + +task sourcesJar(type: Jar, dependsOn: classes) { + from sourceSets.main.allSource + classifier 'sources' + extension 'jar' +} + +task javadocJar(type: Jar, dependsOn: javadoc) { + from javadoc.destinationDir + classifier 'javadoc' + extension 'jar' +} + +task copyGemjar(type: Copy, dependsOn: sourcesJar) { + from project.jar + into project.file('lib/jars/') +} + +task cleanGemjar { + delete fileTree(project.file('lib/jars/')) { + include '*.jar' + } +} + +clean.dependsOn(cleanGemjar) +jar.finalizedBy(copyGemjar) + + +// See http://www.gradle.org/docs/current/userguide/gradle_wrapper.html +task wrapper(type: Wrapper) { + description = 'Install Gradle wrapper' + gradleVersion = '4.5.1' +} diff --git a/gradle/wrapper/gradle-wrapper.jar b/gradle/wrapper/gradle-wrapper.jar new file mode 100644 index 0000000..c44b679 Binary files /dev/null and b/gradle/wrapper/gradle-wrapper.jar differ diff --git a/gradle/wrapper/gradle-wrapper.properties b/gradle/wrapper/gradle-wrapper.properties new file mode 100644 index 0000000..568c50b --- /dev/null +++ b/gradle/wrapper/gradle-wrapper.properties @@ -0,0 +1,5 @@ +distributionBase=GRADLE_USER_HOME +distributionPath=wrapper/dists +zipStoreBase=GRADLE_USER_HOME +zipStorePath=wrapper/dists +distributionUrl=https\://services.gradle.org/distributions/gradle-4.5.1-bin.zip diff --git a/gradlew b/gradlew new file mode 100755 index 0000000..cccdd3d --- /dev/null +++ b/gradlew @@ -0,0 +1,172 @@ +#!/usr/bin/env sh + +############################################################################## +## +## Gradle start up script for UN*X +## +############################################################################## + +# Attempt to set APP_HOME +# Resolve links: $0 may be a link +PRG="$0" +# Need this for relative symlinks. +while [ -h "$PRG" ] ; do + ls=`ls -ld "$PRG"` + link=`expr "$ls" : '.*-> \(.*\)$'` + if expr "$link" : '/.*' > /dev/null; then + PRG="$link" + else + PRG=`dirname "$PRG"`"/$link" + fi +done +SAVED="`pwd`" +cd "`dirname \"$PRG\"`/" >/dev/null +APP_HOME="`pwd -P`" +cd "$SAVED" >/dev/null + +APP_NAME="Gradle" +APP_BASE_NAME=`basename "$0"` + +# Add default JVM options here. You can also use JAVA_OPTS and GRADLE_OPTS to pass JVM options to this script. +DEFAULT_JVM_OPTS="" + +# Use the maximum available, or set MAX_FD != -1 to use that value. +MAX_FD="maximum" + +warn () { + echo "$*" +} + +die () { + echo + echo "$*" + echo + exit 1 +} + +# OS specific support (must be 'true' or 'false'). +cygwin=false +msys=false +darwin=false +nonstop=false +case "`uname`" in + CYGWIN* ) + cygwin=true + ;; + Darwin* ) + darwin=true + ;; + MINGW* ) + msys=true + ;; + NONSTOP* ) + nonstop=true + ;; +esac + +CLASSPATH=$APP_HOME/gradle/wrapper/gradle-wrapper.jar + +# Determine the Java command to use to start the JVM. +if [ -n "$JAVA_HOME" ] ; then + if [ -x "$JAVA_HOME/jre/sh/java" ] ; then + # IBM's JDK on AIX uses strange locations for the executables + JAVACMD="$JAVA_HOME/jre/sh/java" + else + JAVACMD="$JAVA_HOME/bin/java" + fi + if [ ! -x "$JAVACMD" ] ; then + die "ERROR: JAVA_HOME is set to an invalid directory: $JAVA_HOME + +Please set the JAVA_HOME variable in your environment to match the +location of your Java installation." + fi +else + JAVACMD="java" + which java >/dev/null 2>&1 || die "ERROR: JAVA_HOME is not set and no 'java' command could be found in your PATH. + +Please set the JAVA_HOME variable in your environment to match the +location of your Java installation." +fi + +# Increase the maximum file descriptors if we can. +if [ "$cygwin" = "false" -a "$darwin" = "false" -a "$nonstop" = "false" ] ; then + MAX_FD_LIMIT=`ulimit -H -n` + if [ $? -eq 0 ] ; then + if [ "$MAX_FD" = "maximum" -o "$MAX_FD" = "max" ] ; then + MAX_FD="$MAX_FD_LIMIT" + fi + ulimit -n $MAX_FD + if [ $? -ne 0 ] ; then + warn "Could not set maximum file descriptor limit: $MAX_FD" + fi + else + warn "Could not query maximum file descriptor limit: $MAX_FD_LIMIT" + fi +fi + +# For Darwin, add options to specify how the application appears in the dock +if $darwin; then + GRADLE_OPTS="$GRADLE_OPTS \"-Xdock:name=$APP_NAME\" \"-Xdock:icon=$APP_HOME/media/gradle.icns\"" +fi + +# For Cygwin, switch paths to Windows format before running java +if $cygwin ; then + APP_HOME=`cygpath --path --mixed "$APP_HOME"` + CLASSPATH=`cygpath --path --mixed "$CLASSPATH"` + JAVACMD=`cygpath --unix "$JAVACMD"` + + # We build the pattern for arguments to be converted via cygpath + ROOTDIRSRAW=`find -L / -maxdepth 1 -mindepth 1 -type d 2>/dev/null` + SEP="" + for dir in $ROOTDIRSRAW ; do + ROOTDIRS="$ROOTDIRS$SEP$dir" + SEP="|" + done + OURCYGPATTERN="(^($ROOTDIRS))" + # Add a user-defined pattern to the cygpath arguments + if [ "$GRADLE_CYGPATTERN" != "" ] ; then + OURCYGPATTERN="$OURCYGPATTERN|($GRADLE_CYGPATTERN)" + fi + # Now convert the arguments - kludge to limit ourselves to /bin/sh + i=0 + for arg in "$@" ; do + CHECK=`echo "$arg"|egrep -c "$OURCYGPATTERN" -` + CHECK2=`echo "$arg"|egrep -c "^-"` ### Determine if an option + + if [ $CHECK -ne 0 ] && [ $CHECK2 -eq 0 ] ; then ### Added a condition + eval `echo args$i`=`cygpath --path --ignore --mixed "$arg"` + else + eval `echo args$i`="\"$arg\"" + fi + i=$((i+1)) + done + case $i in + (0) set -- ;; + (1) set -- "$args0" ;; + (2) set -- "$args0" "$args1" ;; + (3) set -- "$args0" "$args1" "$args2" ;; + (4) set -- "$args0" "$args1" "$args2" "$args3" ;; + (5) set -- "$args0" "$args1" "$args2" "$args3" "$args4" ;; + (6) set -- "$args0" "$args1" "$args2" "$args3" "$args4" "$args5" ;; + (7) set -- "$args0" "$args1" "$args2" "$args3" "$args4" "$args5" "$args6" ;; + (8) set -- "$args0" "$args1" "$args2" "$args3" "$args4" "$args5" "$args6" "$args7" ;; + (9) set -- "$args0" "$args1" "$args2" "$args3" "$args4" "$args5" "$args6" "$args7" "$args8" ;; + esac +fi + +# Escape application args +save () { + for i do printf %s\\n "$i" | sed "s/'/'\\\\''/g;1s/^/'/;\$s/\$/' \\\\/" ; done + echo " " +} +APP_ARGS=$(save "$@") + +# Collect all arguments for the java command, following the shell quoting and substitution rules +eval set -- $DEFAULT_JVM_OPTS $JAVA_OPTS $GRADLE_OPTS "\"-Dorg.gradle.appname=$APP_BASE_NAME\"" -classpath "\"$CLASSPATH\"" org.gradle.wrapper.GradleWrapperMain "$APP_ARGS" + +# by default we should be in the correct project dir, but when run from Finder on Mac, the cwd is wrong +if [ "$(uname)" = "Darwin" ] && [ "$HOME" = "$PWD" ]; then + cd "$(dirname "$0")" +fi + +exec "$JAVACMD" "$@" diff --git a/gradlew.bat b/gradlew.bat new file mode 100644 index 0000000..e95643d --- /dev/null +++ b/gradlew.bat @@ -0,0 +1,84 @@ +@if "%DEBUG%" == "" @echo off +@rem ########################################################################## +@rem +@rem Gradle startup script for Windows +@rem +@rem ########################################################################## + +@rem Set local scope for the variables with windows NT shell +if "%OS%"=="Windows_NT" setlocal + +set DIRNAME=%~dp0 +if "%DIRNAME%" == "" set DIRNAME=. +set APP_BASE_NAME=%~n0 +set APP_HOME=%DIRNAME% + +@rem Add default JVM options here. You can also use JAVA_OPTS and GRADLE_OPTS to pass JVM options to this script. +set DEFAULT_JVM_OPTS= + +@rem Find java.exe +if defined JAVA_HOME goto findJavaFromJavaHome + +set JAVA_EXE=java.exe +%JAVA_EXE% -version >NUL 2>&1 +if "%ERRORLEVEL%" == "0" goto init + +echo. +echo ERROR: JAVA_HOME is not set and no 'java' command could be found in your PATH. +echo. +echo Please set the JAVA_HOME variable in your environment to match the +echo location of your Java installation. + +goto fail + +:findJavaFromJavaHome +set JAVA_HOME=%JAVA_HOME:"=% +set JAVA_EXE=%JAVA_HOME%/bin/java.exe + +if exist "%JAVA_EXE%" goto init + +echo. +echo ERROR: JAVA_HOME is set to an invalid directory: %JAVA_HOME% +echo. +echo Please set the JAVA_HOME variable in your environment to match the +echo location of your Java installation. + +goto fail + +:init +@rem Get command-line arguments, handling Windows variants + +if not "%OS%" == "Windows_NT" goto win9xME_args + +:win9xME_args +@rem Slurp the command line arguments. +set CMD_LINE_ARGS= +set _SKIP=2 + +:win9xME_args_slurp +if "x%~1" == "x" goto execute + +set CMD_LINE_ARGS=%* + +:execute +@rem Setup the command line + +set CLASSPATH=%APP_HOME%\gradle\wrapper\gradle-wrapper.jar + +@rem Execute Gradle +"%JAVA_EXE%" %DEFAULT_JVM_OPTS% %JAVA_OPTS% %GRADLE_OPTS% "-Dorg.gradle.appname=%APP_BASE_NAME%" -classpath "%CLASSPATH%" org.gradle.wrapper.GradleWrapperMain %CMD_LINE_ARGS% + +:end +@rem End local scope for the variables with windows NT shell +if "%ERRORLEVEL%"=="0" goto mainEnd + +:fail +rem Set variable GRADLE_EXIT_CONSOLE if you need the _script_ return code instead of +rem the _cmd.exe /c_ return code! +if not "" == "%GRADLE_EXIT_CONSOLE%" exit 1 +exit /b 1 + +:mainEnd +if "%OS%"=="Windows_NT" endlocal + +:omega diff --git a/lib/filewatch/bootstrap.rb b/lib/filewatch/bootstrap.rb new file mode 100644 index 0000000..5eac4fa --- /dev/null +++ b/lib/filewatch/bootstrap.rb @@ -0,0 +1,74 @@ +# encoding: utf-8 +require "rbconfig" +require "pathname" +# require "logstash/environment" + +## Common setup +# all the required constants and files +# defined in one place +module FileWatch + # the number of bytes read from a file during the read phase + FILE_READ_SIZE = 32768 + # the largest fixnum in ruby + # this is used in the read loop e.g. + # @opts[:file_chunk_count].times do + # where file_chunk_count defaults to this constant + FIXNUM_MAX = (2**(0.size * 8 - 2) - 1) + + require_relative "helper" + + module WindowsInode + def prepare_inode(path, stat) + fileId = Winhelper.GetWindowsUniqueFileIdentifier(path) + [fileId, 0, 0] # dev_* doesn't make sense on Windows + end + end + + module UnixInode + def prepare_inode(path, stat) + [stat.ino.to_s, stat.dev_major, stat.dev_minor] + end + end + + jar_version = IO.read("JAR_VERSION").strip + + require "java" + require_relative "../../lib/jars/filewatch-#{jar_version}.jar" + require "jruby_file_watch" + + if LogStash::Environment.windows? + require "winhelper" + FileOpener = FileExt + InodeMixin = WindowsInode + else + FileOpener = ::File + InodeMixin = UnixInode + end + + # Structs can be used as hash keys because they compare by value + # this is used as the key for values in the sincedb hash + InodeStruct = Struct.new(:inode, :maj, :min) do + def to_s + to_a.join(" ") + end + end + + class NoSinceDBPathGiven < StandardError; end + + # how often (in seconds) we logger.warn a failed file open, per path. + OPEN_WARN_INTERVAL = ENV.fetch("FILEWATCH_OPEN_WARN_INTERVAL", 300).to_i + MAX_FILES_WARN_INTERVAL = ENV.fetch("FILEWATCH_MAX_FILES_WARN_INTERVAL", 20).to_i + + require "logstash/util/buftok" + require_relative "settings" + require_relative "sincedb_value" + require_relative "sincedb_record_serializer" + require_relative "watched_files_collection" + require_relative "sincedb_collection" + require_relative "watch" + require_relative "watched_file" + require_relative "discoverer" + require_relative "observing_base" + require_relative "observing_tail" + require_relative "observing_read" +end diff --git a/lib/filewatch/discoverer.rb b/lib/filewatch/discoverer.rb new file mode 100644 index 0000000..b98f77d --- /dev/null +++ b/lib/filewatch/discoverer.rb @@ -0,0 +1,94 @@ +# encoding: utf-8 +require "logstash/util/loggable" + +module FileWatch + class Discoverer + # given a path or glob will prepare for and discover files to watch + # if they are not excluded or ignorable + # they are added to the watched_files collection and + # associated with a sincedb entry if one can be found + include LogStash::Util::Loggable + + def initialize(watched_files_collection, sincedb_collection, settings) + @watching = [] + @exclude = [] + @watched_files_collection = watched_files_collection + @sincedb_collection = sincedb_collection + @settings = settings + @settings.exclude.each { |p| @exclude << p } + end + + def add_path(path) + return if @watching.member?(path) + @watching << path + discover_files(path) + self + end + + def discover + @watching.each do |path| + discover_files(path) + end + end + + private + + def can_exclude?(watched_file, new_discovery) + @exclude.each do |pattern| + if watched_file.pathname.fnmatch?(pattern) + if new_discovery + logger.debug("Discoverer can_exclude?: #{watched_file.path}: skipping " + + "because it matches exclude #{pattern}") + end + watched_file.unwatch + return true + end + end + false + end + + def discover_files(path) + globbed = Dir.glob(path) + globbed = [path] if globbed.empty? + logger.debug("Discoverer found files, count: #{globbed.size}") + globbed.each do |file| + logger.debug("Discoverer found file, path: #{file}") + pathname = Pathname.new(file) + next unless pathname.file? + next if pathname.symlink? + new_discovery = false + watched_file = @watched_files_collection.watched_file_by_path(file) + if watched_file.nil? + logger.debug("Discoverer discover_files: #{path}: new: #{file} (exclude is #{@exclude.inspect})") + new_discovery = true + watched_file = WatchedFile.new(pathname, pathname.stat, @settings) + end + # if it already unwatched or its excluded then we can skip + next if watched_file.unwatched? || can_exclude?(watched_file, new_discovery) + + if new_discovery + if watched_file.file_ignorable? + logger.debug("Discoverer discover_files: #{file}: skipping because it was last modified more than #{@settings.ignore_older} seconds ago") + # on discovery ignorable watched_files are put into the ignored state and that + # updates the size from the internal stat + # so the existing contents are not read. + # because, normally, a newly discovered file will + # have a watched_file size of zero + # they are still added to the collection so we know they are there for the next periodic discovery + watched_file.ignore + end + # now add the discovered file to the watched_files collection and adjust the sincedb collections + @watched_files_collection.add(watched_file) + # initially when the sincedb collection is filled with records from the persistence file + # each value is not associated with a watched file + # a sincedb_value can be: + # unassociated + # associated with this watched_file + # associated with a different watched_file + @sincedb_collection.associate(watched_file) + end + # at this point the watched file is created, is in the db but not yet opened or being processed + end + end + end +end diff --git a/lib/filewatch/helper.rb b/lib/filewatch/helper.rb new file mode 100644 index 0000000..5da8c0f --- /dev/null +++ b/lib/filewatch/helper.rb @@ -0,0 +1,65 @@ +# encoding: utf-8 +# code downloaded from Ruby on Rails 4.2.1 +# https://raw.githubusercontent.com/rails/rails/v4.2.1/activesupport/lib/active_support/core_ext/file/atomic.rb +# change method name to avoid borking active_support and vice versa +require 'fileutils' + +module FileHelper + extend self + # Write to a file atomically. Useful for situations where you don't + # want other processes or threads to see half-written files. + # + # File.write_atomically('important.file') do |file| + # file.write('hello') + # end + def write_atomically(file_name) + + if File.exist?(file_name) + # Get original file permissions + old_stat = File.stat(file_name) + else + # If not possible, probe which are the default permissions in the + # destination directory. + old_stat = probe_stat_in(File.dirname(file_name)) + end + + mode = old_stat ? old_stat.mode : nil + + # Create temporary file with identical permissions + temp_file = File.new(rand_filename(file_name), "w", mode) + temp_file.binmode + return_val = yield temp_file + temp_file.close + + # Overwrite original file with temp file + File.rename(temp_file.path, file_name) + + # Unable to get permissions of the original file => return + return return_val if old_stat.nil? + + # Set correct uid/gid on new file + File.chown(old_stat.uid, old_stat.gid, file_name) if old_stat + + return_val + end + + def device?(file_name) + File.chardev?(file_name) || File.blockdev?(file_name) + end + + # Private utility method. + def probe_stat_in(dir) #:nodoc: + basename = rand_filename(".permissions_check") + file_name = File.join(dir, basename) + FileUtils.touch(file_name) + File.stat(file_name) + rescue + # ... + ensure + FileUtils.rm_f(file_name) if File.exist?(file_name) + end + + def rand_filename(prefix) + [ prefix, Thread.current.object_id, Process.pid, rand(1000000) ].join('.') + end +end diff --git a/lib/filewatch/observing_base.rb b/lib/filewatch/observing_base.rb new file mode 100644 index 0000000..387ba77 --- /dev/null +++ b/lib/filewatch/observing_base.rb @@ -0,0 +1,97 @@ +# encoding: utf-8 + +## Interface API topology +# ObservingBase module (this file) +# is a module mixin proving common constructor and external API for File Input Plugin interaction +# calls build_specific_processor on ObservingRead or ObservingTail +# ObservingRead and ObservingTail +# provides the External API method subscribe(observer = NullObserver.new) +# build_specific_processor(settings) - provide a Tail or Read specific Processor. +# TailMode::Processor or ReadMode::Processor +# initialize_handlers(sincedb_collection, observer) - called when the observer subscribes to changes in a Mode, +# builds mode specific handler instances with references to the observer +# process_closed(watched_files) - provide specific processing of watched_files in the closed state +# process_ignored(watched_files) - provide specific processing of watched_files in the ignored state +# process_watched(watched_files) - provide specific processing of watched_files in the watched state +# process_active(watched_files) - provide specific processing of watched_files in the active state +# These methods can call "handler" methods that delegate to the specific Handler classes. +# TailMode::Handlers module namespace +# contains the Handler classes that deals with Tail mode file lifecycle "events". +# The TailMode::Handlers::Base +# handle(watched_file) - this method calls handle_specifically defined in a subclass +# handle_specifically(watched_file) - this is a noop method +# update_existing_specifically(watched_file, sincedb_value) - this is a noop method +# Each handler extends the Base class to provide specific implementations of these two methods: +# handle_specifically(watched_file) +# update_existing_specifically(watched_file, sincedb_value) +# ReadMode::Handlers module namespace +# contains the Handler classes that deals with Read mode file lifecycle "events". +# The ReadMode::Handlers::Base +# handle(watched_file) - this method calls handle_specifically defined in a subclass +# handle_specifically(watched_file) - this is a noop method +# Each handler extends the Base class to provide specific implementations of this method: +# handle_specifically(watched_file) + +module FileWatch + module ObservingBase + attr_reader :watch, :sincedb_collection, :settings + + def initialize(opts={}) + options = { + :sincedb_write_interval => 10, + :stat_interval => 1, + :discover_interval => 5, + :exclude => [], + :start_new_files_at => :end, + :delimiter => "\n", + :file_chunk_count => FIXNUM_MAX, + :file_sort_by => "last_modified", + :file_sort_direction => "asc", + }.merge(opts) + unless options.include?(:sincedb_path) + raise NoSinceDBPathGiven.new("No sincedb_path set in options. This should have been added in the main LogStash::Inputs::File class") + end + @settings = Settings.from_options(options) + build_watch_and_dependencies + end + + def build_watch_and_dependencies + logger.info("START, creating Discoverer, Watch with file and sincedb collections") + watched_files_collection = WatchedFilesCollection.new(@settings) + @sincedb_collection = SincedbCollection.new(@settings) + @sincedb_collection.open + discoverer = Discoverer.new(watched_files_collection, @sincedb_collection, @settings) + @watch = Watch.new(discoverer, watched_files_collection, @settings) + @watch.add_processor build_specific_processor(@settings) + end + + def watch_this(path) + @watch.watch(path) + end + + def sincedb_write(reason=nil) + # can be invoked from the file input + @sincedb_collection.write(reason) + end + + # quit is a sort-of finalizer, + # it should be called for clean up + # before the instance is disposed of. + def quit + logger.info("QUIT - closing all files and shutting down.") + @watch.quit # <-- should close all the files + # sincedb_write("shutting down") + end + + # close_file(path) is to be used by external code + # when it knows that it is completely done with a file. + # Other files or folders may still be being watched. + # Caution, once unwatched, a file can't be watched again + # unless a new instance of this class begins watching again. + # The sysadmin should rename, move or delete the file. + def close_file(path) + @watch.unwatch(path) + sincedb_write + end + end +end diff --git a/lib/filewatch/observing_read.rb b/lib/filewatch/observing_read.rb new file mode 100644 index 0000000..272fee9 --- /dev/null +++ b/lib/filewatch/observing_read.rb @@ -0,0 +1,23 @@ +# encoding: utf-8 +require "logstash/util/loggable" +require_relative "read_mode/processor" + +module FileWatch + class ObservingRead + include LogStash::Util::Loggable + include ObservingBase + + def subscribe(observer) + # observer here is the file input + watch.subscribe(observer, sincedb_collection) + sincedb_collection.write("read mode subscribe complete - shutting down") + end + + private + + def build_specific_processor(settings) + ReadMode::Processor.new(settings) + end + + end +end diff --git a/lib/filewatch/observing_tail.rb b/lib/filewatch/observing_tail.rb new file mode 100644 index 0000000..e2f51b6 --- /dev/null +++ b/lib/filewatch/observing_tail.rb @@ -0,0 +1,22 @@ +# encoding: utf-8 +require "logstash/util/loggable" +require_relative 'tail_mode/processor' + +module FileWatch + class ObservingTail + include LogStash::Util::Loggable + include ObservingBase + + def subscribe(observer) + # observer here is the file input + watch.subscribe(observer, sincedb_collection) + sincedb_collection.write("tail mode subscribe complete - shutting down") + end + + private + + def build_specific_processor(settings) + TailMode::Processor.new(settings) + end + end +end diff --git a/lib/filewatch/read_mode/handlers/base.rb b/lib/filewatch/read_mode/handlers/base.rb new file mode 100644 index 0000000..3a8680e --- /dev/null +++ b/lib/filewatch/read_mode/handlers/base.rb @@ -0,0 +1,81 @@ +# encoding: utf-8 +require "logstash/util/loggable" + +module FileWatch module ReadMode module Handlers + class Base + include LogStash::Util::Loggable + + attr_reader :sincedb_collection + + def initialize(sincedb_collection, observer, settings) + @settings = settings + @sincedb_collection = sincedb_collection + @observer = observer + end + + def handle(watched_file) + logger.debug("handling: #{watched_file.path}") + unless watched_file.has_listener? + watched_file.set_listener(@observer) + end + handle_specifically(watched_file) + end + + def handle_specifically(watched_file) + # some handlers don't need to define this method + end + + private + + def open_file(watched_file) + return true if watched_file.file_open? + logger.debug("opening #{watched_file.path}") + begin + watched_file.open + rescue + # don't emit this message too often. if a file that we can't + # read is changing a lot, we'll try to open it more often, and spam the logs. + now = Time.now.to_i + logger.warn("opening OPEN_WARN_INTERVAL is '#{OPEN_WARN_INTERVAL}'") + if watched_file.last_open_warning_at.nil? || now - watched_file.last_open_warning_at > OPEN_WARN_INTERVAL + logger.warn("failed to open #{watched_file.path}: #{$!.inspect}, #{$!.backtrace.take(3)}") + watched_file.last_open_warning_at = now + else + logger.debug("suppressed warning for `failed to open` #{watched_file.path}: #{$!.inspect}") + end + watched_file.watch # set it back to watch so we can try it again + end + if watched_file.file_open? + watched_file.listener.opened + true + else + false + end + end + + def add_or_update_sincedb_collection(watched_file) + sincedb_value = @sincedb_collection.find(watched_file) + if sincedb_value.nil? + add_new_value_sincedb_collection(watched_file) + elsif sincedb_value.watched_file == watched_file + update_existing_sincedb_collection_value(watched_file, sincedb_value) + else + logger.warn? && logger.warn("mismatch on sincedb_value.watched_file, this should have been handled by Discoverer") + end + watched_file.initial_completed + end + + def update_existing_sincedb_collection_value(watched_file, sincedb_value) + logger.debug("update_existing_sincedb_collection_value: #{watched_file.path}, last value #{sincedb_value.position}, cur size #{watched_file.last_stat_size}") + # sincedb_value is the source of truth + watched_file.update_bytes_read(sincedb_value.position) + end + + def add_new_value_sincedb_collection(watched_file) + sincedb_value = SincedbValue.new(0) + sincedb_value.set_watched_file(watched_file) + logger.debug("add_new_value_sincedb_collection: #{watched_file.path}", "position" => sincedb_value.position) + sincedb_collection.set(watched_file.sincedb_key, sincedb_value) + end + end +end end end diff --git a/lib/filewatch/read_mode/handlers/read_file.rb b/lib/filewatch/read_mode/handlers/read_file.rb new file mode 100644 index 0000000..cff1a76 --- /dev/null +++ b/lib/filewatch/read_mode/handlers/read_file.rb @@ -0,0 +1,47 @@ +# encoding: utf-8 + +module FileWatch module ReadMode module Handlers + class ReadFile < Base + def handle_specifically(watched_file) + if open_file(watched_file) + add_or_update_sincedb_collection(watched_file) unless sincedb_collection.member?(watched_file.sincedb_key) + # if the `file_chunk_count` * `file_chunk_size` is less than the file size + # then this method will be executed multiple times + # and the seek is moved to just after a line boundary as recorded in the sincedb + # for each run - so we reset the buffer + watched_file.reset_buffer + watched_file.file_seek(watched_file.bytes_read) + changed = false + @settings.file_chunk_count.times do + begin + lines = watched_file.buffer_extract(watched_file.file_read(@settings.file_chunk_size)) + logger.warn("read_to_eof: no delimiter found in current chunk") if lines.empty? + changed = true + lines.each do |line| + watched_file.listener.accept(line) + sincedb_collection.increment(watched_file.sincedb_key, line.bytesize + @settings.delimiter_byte_size) + end + rescue EOFError + # flush the buffer now in case there is no final delimiter + line = watched_file.buffer.flush + watched_file.listener.accept(line) unless line.empty? + watched_file.listener.eof + watched_file.file_close + sincedb_collection.unset_watched_file(watched_file) + watched_file.listener.deleted + watched_file.unwatch + break + rescue Errno::EWOULDBLOCK, Errno::EINTR + watched_file.listener.error + break + rescue => e + logger.error("read_to_eof: general error reading #{watched_file.path} - error: #{e.inspect}") + watched_file.listener.error + break + end + end + sincedb_collection.request_disk_flush if changed + end + end + end +end end end diff --git a/lib/filewatch/read_mode/handlers/read_zip_file.rb b/lib/filewatch/read_mode/handlers/read_zip_file.rb new file mode 100644 index 0000000..75689fc --- /dev/null +++ b/lib/filewatch/read_mode/handlers/read_zip_file.rb @@ -0,0 +1,57 @@ +# encoding: utf-8 +require 'java' +java_import java.io.InputStream +java_import java.io.InputStreamReader +java_import java.io.FileInputStream +java_import java.io.BufferedReader +java_import java.util.zip.GZIPInputStream +java_import java.util.zip.ZipException + +module FileWatch module ReadMode module Handlers + class ReadZipFile < Base + def handle_specifically(watched_file) + add_or_update_sincedb_collection(watched_file) unless sincedb_collection.member?(watched_file.sincedb_key) + # can't really stripe read a zip file, its all or nothing. + watched_file.listener.opened + # what do we do about quit when we have just begun reading the zipped file (e.g. pipeline reloading) + # should we track lines read in the sincedb and + # fast forward through the lines until we reach unseen content? + # meaning that we can quit in the middle of a zip file + begin + file_stream = FileInputStream.new(watched_file.path) + gzip_stream = GZIPInputStream.new(file_stream) + decoder = InputStreamReader.new(gzip_stream, "UTF-8") + buffered = BufferedReader.new(decoder) + while (line = buffered.readLine(false)) + watched_file.listener.accept(line) + end + watched_file.listener.eof + rescue ZipException => e + logger.error("Cannot decompress the gzip file at path: #{watched_file.path}") + watched_file.listener.error + else + sincedb_collection.store_last_read(watched_file.sincedb_key, watched_file.last_stat_size) + sincedb_collection.request_disk_flush + watched_file.listener.deleted + watched_file.unwatch + ensure + # rescue each close individually so all close attempts are tried + close_and_ignore_ioexception(buffered) unless buffered.nil? + close_and_ignore_ioexception(decoder) unless decoder.nil? + close_and_ignore_ioexception(gzip_stream) unless gzip_stream.nil? + close_and_ignore_ioexception(file_stream) unless file_stream.nil? + end + sincedb_collection.unset_watched_file(watched_file) + end + + private + + def close_and_ignore_ioexception(closeable) + begin + closeable.close + rescue Exception => e # IOException can be thrown by any of the Java classes that implement the Closable interface. + logger.warn("Ignoring an IOException when closing an instance of #{closeable.class.name}", "exception" => e) + end + end + end +end end end diff --git a/lib/filewatch/read_mode/processor.rb b/lib/filewatch/read_mode/processor.rb new file mode 100644 index 0000000..5a8c615 --- /dev/null +++ b/lib/filewatch/read_mode/processor.rb @@ -0,0 +1,117 @@ +# encoding: utf-8 +require "logstash/util/loggable" + +require_relative "handlers/base" +require_relative "handlers/read_file" +require_relative "handlers/read_zip_file" + +module FileWatch module ReadMode + # Must handle + # :read_file + # :read_zip_file + class Processor + include LogStash::Util::Loggable + + attr_reader :watch, :deletable_filepaths + + def initialize(settings) + @settings = settings + @deletable_filepaths = [] + end + + def add_watch(watch) + @watch = watch + self + end + + def initialize_handlers(sincedb_collection, observer) + @read_file = Handlers::ReadFile.new(sincedb_collection, observer, @settings) + @read_zip_file = Handlers::ReadZipFile.new(sincedb_collection, observer, @settings) + end + + def read_file(watched_file) + @read_file.handle(watched_file) + end + + def read_zip_file(watched_file) + @read_zip_file.handle(watched_file) + end + + def process_closed(watched_files) + # do not process watched_files in the closed state. + end + + def process_ignored(watched_files) + # do not process watched_files in the ignored state. + end + + def process_watched(watched_files) + logger.debug("Watched processing") + # Handles watched_files in the watched state. + # for a slice of them: + # move to the active state + # should never have been active before + # how much of the max active window is available + to_take = @settings.max_active - watched_files.count{|wf| wf.active?} + if to_take > 0 + watched_files.select {|wf| wf.watched?}.take(to_take).each do |watched_file| + path = watched_file.path + begin + watched_file.restat + watched_file.activate + rescue Errno::ENOENT + common_deleted_reaction(watched_file, "Watched") + next + rescue => e + common_error_reaction(path, e, "Watched") + next + end + break if watch.quit? + end + else + now = Time.now.to_i + if (now - watch.lastwarn_max_files) > MAX_FILES_WARN_INTERVAL + waiting = watched_files.size - @settings.max_active + logger.warn(@settings.max_warn_msg + ", files yet to open: #{waiting}") + watch.lastwarn_max_files = now + end + end + end + + def process_active(watched_files) + logger.debug("Active processing") + # Handles watched_files in the active state. + watched_files.select {|wf| wf.active? }.each do |watched_file| + path = watched_file.path + begin + watched_file.restat + rescue Errno::ENOENT + common_deleted_reaction(watched_file, "Active") + next + rescue => e + common_error_reaction(path, e, "Active") + next + end + break if watch.quit? + + if watched_file.compressed? + read_zip_file(watched_file) + else + read_file(watched_file) + end + # handlers take care of closing and unwatching + end + end + + def common_deleted_reaction(watched_file, action) + # file has gone away or we can't read it anymore. + watched_file.unwatch + deletable_filepaths << watched_file.path + logger.debug("#{action} - stat failed: #{watched_file.path}, removing from collection") + end + + def common_error_reaction(path, error, action) + logger.error("#{action} - other error #{path}: (#{error.message}, #{error.backtrace.take(8).inspect})") + end + end +end end diff --git a/lib/filewatch/settings.rb b/lib/filewatch/settings.rb new file mode 100644 index 0000000..5d7b61f --- /dev/null +++ b/lib/filewatch/settings.rb @@ -0,0 +1,67 @@ +# encoding: utf-8 + +module FileWatch + class Settings + attr_reader :delimiter, :close_older, :ignore_older, :delimiter_byte_size + attr_reader :max_active, :max_warn_msg, :lastwarn_max_files + attr_reader :sincedb_write_interval, :stat_interval, :discover_interval + attr_reader :exclude, :start_new_files_at, :file_chunk_count, :file_chunk_size + attr_reader :sincedb_path, :sincedb_write_interval, :sincedb_expiry_duration + attr_reader :file_sort_by, :file_sort_direction + + def self.from_options(opts) + new.add_options(opts) + end + + def self.days_to_seconds(days) + (24 * 3600) * days.to_f + end + + def initialize + defaults = { + :delimiter => "\n", + :file_chunk_size => FILE_READ_SIZE, + :max_active => 4095, + :file_chunk_count => FIXNUM_MAX, + :sincedb_clean_after => 14, + :exclude => [], + :stat_interval => 1, + :discover_interval => 5, + :file_sort_by => "last_modified", + :file_sort_direction => "asc", + } + @opts = {} + @lastwarn_max_files = 0 + add_options(defaults) + end + + def add_options(opts) + @opts.update(opts) + self.max_open_files = @opts[:max_active] + @delimiter = @opts[:delimiter] + @delimiter_byte_size = @delimiter.bytesize + @file_chunk_size = @opts[:file_chunk_size] + @close_older = @opts[:close_older] + @ignore_older = @opts[:ignore_older] + @sincedb_write_interval = @opts[:sincedb_write_interval] + @stat_interval = @opts[:stat_interval] + @discover_interval = @opts[:discover_interval] + @exclude = Array(@opts[:exclude]) + @start_new_files_at = @opts[:start_new_files_at] + @file_chunk_count = @opts[:file_chunk_count] + @sincedb_path = @opts[:sincedb_path] + @sincedb_write_interval = @opts[:sincedb_write_interval] + @sincedb_expiry_duration = self.class.days_to_seconds(@opts.fetch(:sincedb_clean_after, 14)) + @file_sort_by = @opts[:file_sort_by] + @file_sort_direction = @opts[:file_sort_direction] + self + end + + def max_open_files=(value) + val = value.to_i + val = 4095 if value.nil? || val <= 0 + @max_warn_msg = "Reached open files limit: #{val}, set by the 'max_open_files' option or default" + @max_active = val + end + end +end diff --git a/lib/filewatch/sincedb_collection.rb b/lib/filewatch/sincedb_collection.rb new file mode 100644 index 0000000..6d5dba2 --- /dev/null +++ b/lib/filewatch/sincedb_collection.rb @@ -0,0 +1,215 @@ +# encoding: utf-8 +require "logstash/util/loggable" + +module FileWatch + # this KV collection has a watched_file storage_key (an InodeStruct) as the key + # and a SincedbValue as the value. + # the SincedbValues are built by reading the sincedb file. + class SincedbCollection + include LogStash::Util::Loggable + + attr_reader :path + attr_writer :serializer + + def initialize(settings) + @settings = settings + @sincedb_last_write = 0 + @sincedb = {} + @serializer = SincedbRecordSerializer.new(@settings.sincedb_expiry_duration) + @path = Pathname.new(@settings.sincedb_path) + @write_method = LogStash::Environment.windows? || @path.chardev? || @path.blockdev? ? method(:non_atomic_write) : method(:atomic_write) + @full_path = @path.to_path + FileUtils.touch(@full_path) + end + + def request_disk_flush + now = Time.now.to_i + delta = now - @sincedb_last_write + if delta >= @settings.sincedb_write_interval + logger.debug("writing sincedb (delta since last write = #{delta})") + sincedb_write(now) + end + end + + def write(reason=nil) + logger.debug("caller requested sincedb write (#{reason})") + sincedb_write + end + + def open + @time_sdb_opened = Time.now.to_f + begin + path.open do |file| + logger.debug("open: reading from #{path}") + @serializer.deserialize(file) do |key, value| + logger.debug("open: importing ... '#{key}' => '#{value}'") + set_key_value(key, value) + end + end + logger.debug("open: count of keys read: #{@sincedb.keys.size}") + rescue => e + #No existing sincedb to load + logger.debug("open: error: #{path}: #{e.inspect}") + end + + end + + def associate(watched_file) + logger.debug("associate: finding: #{watched_file.path}") + sincedb_value = find(watched_file) + if sincedb_value.nil? + # sincedb has no record of this inode + # and due to the window handling of many files + # this file may not be opened in this session. + # a new value will be added when the file is opened + return + end + if sincedb_value.watched_file.nil? + # not associated + if sincedb_value.path_in_sincedb.nil? + # old v1 record, assume its the same file + handle_association(sincedb_value, watched_file) + return + end + if sincedb_value.path_in_sincedb == watched_file.path + # the path on disk is the same as discovered path + # and the inode is the same. + handle_association(sincedb_value, watched_file) + return + end + # the path on disk is different from discovered unassociated path + # but they have the same key (inode) + # treat as a new file, a new value will be added when the file is opened + logger.debug("associate: matched but allocated to another - #{sincedb_value}") + sincedb_value.clear_watched_file + delete(watched_file.sincedb_key) + return + end + if sincedb_value.watched_file.equal?(watched_file) # pointer equals + logger.debug("associate: already associated - #{sincedb_value}, for path: #{watched_file.path}") + return + end + # sincedb_value.watched_file is not the discovered watched_file but they have the same key (inode) + # this means that the filename was changed during this session. + # logout the history of the old sincedb_value and remove it + # a new value will be added when the file is opened + # TODO notify about done-ness of old sincedb_value and watched_file + old_watched_file = sincedb_value.watched_file + sincedb_value.clear_watched_file + if logger.debug? + logger.debug("associate: matched but allocated to another - #{sincedb_value}") + logger.debug("associate: matched but allocated to another - old watched_file history - #{old_watched_file.recent_state_history.join(', ')}") + logger.debug("associate: matched but allocated to another - DELETING value at key `#{old_watched_file.sincedb_key}`") + end + delete(old_watched_file.sincedb_key) + end + + def find(watched_file) + get(watched_file.sincedb_key).tap do |obj| + logger.debug("find for path: #{watched_file.path}, found: '#{!obj.nil?}'") + end + end + + def member?(key) + @sincedb.member?(key) + end + + def get(key) + @sincedb[key] + end + + def delete(key) + @sincedb.delete(key) + end + + def last_read(key) + @sincedb[key].position + end + + def rewind(key) + @sincedb[key].update_position(0) + end + + def store_last_read(key, last_read) + @sincedb[key].update_position(last_read) + end + + def increment(key, amount) + @sincedb[key].increment_position(amount) + end + + def set_watched_file(key, watched_file) + @sincedb[key].set_watched_file(watched_file) + end + + def unset_watched_file(watched_file) + return unless member?(watched_file.sincedb_key) + get(watched_file.sincedb_key).unset_watched_file + end + + def clear + @sincedb.clear + end + + def keys + @sincedb.keys + end + + def set(key, value) + @sincedb[key] = value + value + end + + def watched_file_unset?(key) + return false unless member?(key) + get(key).watched_file.nil? + end + + private + + def handle_association(sincedb_value, watched_file) + watched_file.update_bytes_read(sincedb_value.position) + sincedb_value.set_watched_file(watched_file) + watched_file.initial_completed + watched_file.ignore if watched_file.all_read? + end + + def set_key_value(key, value) + if @time_sdb_opened < value.last_changed_at_expires(@settings.sincedb_expiry_duration) + logger.debug("open: setting #{key.inspect} to #{value.inspect}") + set(key, value) + else + logger.debug("open: record has expired, skipping: #{key.inspect} #{value.inspect}") + end + end + + def sincedb_write(time = Time.now.to_i) + logger.debug("sincedb_write: to: #{path}") + begin + @write_method.call + @serializer.expired_keys.each do |key| + @sincedb[key].unset_watched_file + delete(key) + logger.debug("sincedb_write: cleaned", "key" => "'#{key}'") + end + @sincedb_last_write = time + rescue Errno::EACCES + # no file handles free perhaps + # maybe it will work next time + logger.debug("sincedb_write: error: #{path}: #{$!}") + end + end + + def atomic_write + FileHelper.write_atomically(@full_path) do |io| + @serializer.serialize(@sincedb, io) + end + end + + def non_atomic_write + IO.open(@full_path, 0) do |io| + @serializer.serialize(@sincedb, io) + end + end + end +end diff --git a/lib/filewatch/sincedb_record_serializer.rb b/lib/filewatch/sincedb_record_serializer.rb new file mode 100644 index 0000000..81e8a34 --- /dev/null +++ b/lib/filewatch/sincedb_record_serializer.rb @@ -0,0 +1,70 @@ +# encoding: utf-8 + +module FileWatch + class SincedbRecordSerializer + + attr_reader :expired_keys + + def initialize(sincedb_value_expiry) + @sincedb_value_expiry = sincedb_value_expiry + @expired_keys = [] + end + + def update_sincedb_value_expiry_from_days(days) + @sincedb_value_expiry = Settings.days_to_seconds(days) + end + + def serialize(db, io, as_of = Time.now.to_f) + @expired_keys.clear + db.each do |key, value| + if as_of > value.last_changed_at_expires(@sincedb_value_expiry) + @expired_keys << key + next + end + io.write(serialize_record(key, value)) + end + end + + def deserialize(io) + io.each do |record| + yield deserialize_record(record) #.tap{|val| STDERR.puts val} + end + end + + def serialize_record(k, v) + # effectively InodeStruct#to_s SincedbValue#to_s + "#{k} #{v}\n" + end + + def deserialize_record(record) + return [] if record.nil? || record.empty? + parts = record.split(" ") + parse_line_v2(parts) || parse_line_v1(parts) + end + + private + + def parse_line_v2(parts) + # new format e.g. 2977152 1 4 94 1519319662.852678 'path/to/file' + # do we want to store the last known state of the watched file too? + return false if parts.size < 5 + inode_struct = prepare_inode_struct(parts) + pos = parts.shift.to_i + expires_at = Float(parts.shift) # this is like Time.now.to_f + path_in_sincedb = parts.shift + value = SincedbValue.new(pos, expires_at).add_path_in_sincedb(path_in_sincedb) + [inode_struct, value] + end + + def parse_line_v1(parts) + # old inode based e.g. 2977152 1 4 94 + inode_struct = prepare_inode_struct(parts) + pos = parts.shift.to_i + [inode_struct, SincedbValue.new(pos)] + end + + def prepare_inode_struct(parts) + InodeStruct.new(parts.shift, *parts.shift(2).map(&:to_i)) + end + end +end diff --git a/lib/filewatch/sincedb_value.rb b/lib/filewatch/sincedb_value.rb new file mode 100644 index 0000000..d5fa921 --- /dev/null +++ b/lib/filewatch/sincedb_value.rb @@ -0,0 +1,87 @@ +# encoding: utf-8 + +module FileWatch + # Tracks the position and expiry of the offset of a file-of-interest + class SincedbValue + attr_reader :last_changed_at, :watched_file, :path_in_sincedb + + def initialize(position, last_changed_at = nil, watched_file = nil) + @position = position # this is the value read from disk + @last_changed_at = last_changed_at + @watched_file = watched_file + touch if @last_changed_at.nil? || @last_changed_at.zero? + end + + def add_path_in_sincedb(path) + @path_in_sincedb = path # can be nil + self + end + + def last_changed_at_expires(duration) + @last_changed_at + duration + end + + def position + # either the value from disk or the current wf position + @watched_file.nil? ? @position : @watched_file.bytes_read + end + + def update_position(pos) + touch + if @watched_file.nil? + @position = pos + else + @watched_file.update_bytes_read(pos) + end + end + + def increment_position(pos) + touch + if watched_file.nil? + @position += pos + else + @watched_file.increment_bytes_read(pos) + end + end + + def set_watched_file(watched_file) + touch + @watched_file = watched_file + end + + def touch + @last_changed_at = Time.now.to_f + end + + def to_s + # consider serializing the watched_file state as well + "#{position} #{last_changed_at}".tap do |s| + if @watched_file.nil? + s.concat(" ").concat(@path_in_sincedb) unless @path_in_sincedb.nil? + else + s.concat(" ").concat(@watched_file.path) + end + end + end + + def clear_watched_file + @watched_file = nil + end + + def unset_watched_file + # cache the position + # we don't cache the path here because we know we are done with this file. + # either due via the `delete` handling + # or when read mode is done with a file. + # in the case of `delete` if the file was renamed then @watched_file is the + # watched_file of the previous path and the new path will be discovered and + # it should have the same inode as before. + # The key from the new watched_file should then locate this entry and we + # can resume from the cached position + return if @watched_file.nil? + wf = @watched_file + @watched_file = nil + @position = wf.bytes_read + end + end +end diff --git a/lib/filewatch/tail_mode/handlers/base.rb b/lib/filewatch/tail_mode/handlers/base.rb new file mode 100644 index 0000000..c8bc7a0 --- /dev/null +++ b/lib/filewatch/tail_mode/handlers/base.rb @@ -0,0 +1,124 @@ +# encoding: utf-8 +require "logstash/util/loggable" + +module FileWatch module TailMode module Handlers + class Base + include LogStash::Util::Loggable + attr_reader :sincedb_collection + + def initialize(sincedb_collection, observer, settings) + @settings = settings + @sincedb_collection = sincedb_collection + @observer = observer + end + + def handle(watched_file) + logger.debug("handling: #{watched_file.path}") + unless watched_file.has_listener? + watched_file.set_listener(@observer) + end + handle_specifically(watched_file) + end + + def handle_specifically(watched_file) + # some handlers don't need to define this method + end + + def update_existing_specifically(watched_file, sincedb_value) + # when a handler subclass does not implement this then do nothing + end + + private + + def read_to_eof(watched_file) + changed = false + # from a real config (has 102 file inputs) + # -- This cfg creates a file input for every log file to create a dedicated file pointer and read all file simultaneously + # -- If we put all log files in one file input glob we will have indexing delay, because Logstash waits until the first file becomes EOF + # by allowing the user to specify a combo of `file_chunk_count` X `file_chunk_size`... + # we enable the pseudo parallel processing of each file. + # user also has the option to specify a low `stat_interval` and a very high `discover_interval`to respond + # quicker to changing files and not allowing too much content to build up before reading it. + @settings.file_chunk_count.times do + begin + data = watched_file.file_read(@settings.file_chunk_size) + lines = watched_file.buffer_extract(data) + logger.warn("read_to_eof: no delimiter found in current chunk") if lines.empty? + changed = true + lines.each do |line| + watched_file.listener.accept(line) + sincedb_collection.increment(watched_file.sincedb_key, line.bytesize + @settings.delimiter_byte_size) + end + rescue EOFError + # it only makes sense to signal EOF in "read" mode not "tail" + break + rescue Errno::EWOULDBLOCK, Errno::EINTR + watched_file.listener.error + break + rescue => e + logger.error("read_to_eof: general error reading #{watched_file.path}", "error" => e.inspect, "backtrace" => e.backtrace.take(4)) + watched_file.listener.error + break + end + end + sincedb_collection.request_disk_flush if changed + end + + def open_file(watched_file) + return true if watched_file.file_open? + logger.debug("opening #{watched_file.path}") + begin + watched_file.open + rescue + # don't emit this message too often. if a file that we can't + # read is changing a lot, we'll try to open it more often, and spam the logs. + now = Time.now.to_i + logger.warn("open_file OPEN_WARN_INTERVAL is '#{OPEN_WARN_INTERVAL}'") + if watched_file.last_open_warning_at.nil? || now - watched_file.last_open_warning_at > OPEN_WARN_INTERVAL + logger.warn("failed to open #{watched_file.path}: #{$!.inspect}, #{$!.backtrace.take(3)}") + watched_file.last_open_warning_at = now + else + logger.debug("suppressed warning for `failed to open` #{watched_file.path}: #{$!.inspect}") + end + watched_file.watch # set it back to watch so we can try it again + end + if watched_file.file_open? + watched_file.listener.opened + true + else + false + end + end + + def add_or_update_sincedb_collection(watched_file) + sincedb_value = @sincedb_collection.find(watched_file) + if sincedb_value.nil? + add_new_value_sincedb_collection(watched_file) + elsif sincedb_value.watched_file == watched_file + update_existing_sincedb_collection_value(watched_file, sincedb_value) + else + logger.warn? && logger.warn("mismatch on sincedb_value.watched_file, this should have been handled by Discoverer") + end + watched_file.initial_completed + end + + def update_existing_sincedb_collection_value(watched_file, sincedb_value) + logger.debug("update_existing_sincedb_collection_value: #{watched_file.path}, last value #{sincedb_value.position}, cur size #{watched_file.last_stat_size}") + update_existing_specifically(watched_file, sincedb_value) + end + + def add_new_value_sincedb_collection(watched_file) + sincedb_value = get_new_value_specifically(watched_file) + logger.debug("add_new_value_sincedb_collection: #{watched_file.path}", "position" => sincedb_value.position) + sincedb_collection.set(watched_file.sincedb_key, sincedb_value) + end + + def get_new_value_specifically(watched_file) + position = @settings.start_new_files_at == :beginning ? 0 : watched_file.last_stat_size + value = SincedbValue.new(position) + value.set_watched_file(watched_file) + watched_file.update_bytes_read(position) + value + end + end +end end end diff --git a/lib/filewatch/tail_mode/handlers/create.rb b/lib/filewatch/tail_mode/handlers/create.rb new file mode 100644 index 0000000..2b89c11 --- /dev/null +++ b/lib/filewatch/tail_mode/handlers/create.rb @@ -0,0 +1,17 @@ +# encoding: utf-8 + +module FileWatch module TailMode module Handlers + class Create < Base + def handle_specifically(watched_file) + if open_file(watched_file) + add_or_update_sincedb_collection(watched_file) unless sincedb_collection.member?(watched_file.sincedb_key) + end + end + + def update_existing_specifically(watched_file, sincedb_value) + # sincedb_value is the source of truth + position = sincedb_value.position + watched_file.update_bytes_read(position) + end + end +end end end diff --git a/lib/filewatch/tail_mode/handlers/create_initial.rb b/lib/filewatch/tail_mode/handlers/create_initial.rb new file mode 100644 index 0000000..65c385b --- /dev/null +++ b/lib/filewatch/tail_mode/handlers/create_initial.rb @@ -0,0 +1,21 @@ +# encoding: utf-8 + +module FileWatch module TailMode module Handlers + class CreateInitial < Base + def handle_specifically(watched_file) + if open_file(watched_file) + add_or_update_sincedb_collection(watched_file) + end + end + + def update_existing_specifically(watched_file, sincedb_value) + position = watched_file.last_stat_size + if @settings.start_new_files_at == :beginning + position = 0 + end + logger.debug("update_existing_specifically - #{watched_file.path}: seeking to #{position}") + watched_file.update_bytes_read(position) + sincedb_value.update_position(position) + end + end +end end end diff --git a/lib/filewatch/tail_mode/handlers/delete.rb b/lib/filewatch/tail_mode/handlers/delete.rb new file mode 100644 index 0000000..39e4c74 --- /dev/null +++ b/lib/filewatch/tail_mode/handlers/delete.rb @@ -0,0 +1,11 @@ +# encoding: utf-8 + +module FileWatch module TailMode module Handlers + class Delete < Base + def handle_specifically(watched_file) + watched_file.listener.deleted + sincedb_collection.unset_watched_file(watched_file) + watched_file.file_close + end + end +end end end diff --git a/lib/filewatch/tail_mode/handlers/grow.rb b/lib/filewatch/tail_mode/handlers/grow.rb new file mode 100644 index 0000000..826017e --- /dev/null +++ b/lib/filewatch/tail_mode/handlers/grow.rb @@ -0,0 +1,11 @@ +# encoding: utf-8 + +module FileWatch module TailMode module Handlers + class Grow < Base + def handle_specifically(watched_file) + watched_file.file_seek(watched_file.bytes_read) + logger.debug("reading to eof: #{watched_file.path}") + read_to_eof(watched_file) + end + end +end end end diff --git a/lib/filewatch/tail_mode/handlers/shrink.rb b/lib/filewatch/tail_mode/handlers/shrink.rb new file mode 100644 index 0000000..9a7f0f0 --- /dev/null +++ b/lib/filewatch/tail_mode/handlers/shrink.rb @@ -0,0 +1,20 @@ +# encoding: utf-8 + +module FileWatch module TailMode module Handlers + class Shrink < Base + def handle_specifically(watched_file) + add_or_update_sincedb_collection(watched_file) + watched_file.file_seek(watched_file.bytes_read) + logger.debug("reading to eof: #{watched_file.path}") + read_to_eof(watched_file) + end + + def update_existing_specifically(watched_file, sincedb_value) + # we have a match but size is smaller + # set all to zero + logger.debug("update_existing_specifically: #{watched_file.path}: was truncated seeking to beginning") + watched_file.update_bytes_read(0) if watched_file.bytes_read != 0 + sincedb_value.update_position(0) + end + end +end end end diff --git a/lib/filewatch/tail_mode/handlers/timeout.rb b/lib/filewatch/tail_mode/handlers/timeout.rb new file mode 100644 index 0000000..248eee0 --- /dev/null +++ b/lib/filewatch/tail_mode/handlers/timeout.rb @@ -0,0 +1,10 @@ +# encoding: utf-8 + +module FileWatch module TailMode module Handlers + class Timeout < Base + def handle_specifically(watched_file) + watched_file.listener.timed_out + watched_file.file_close + end + end +end end end diff --git a/lib/filewatch/tail_mode/handlers/unignore.rb b/lib/filewatch/tail_mode/handlers/unignore.rb new file mode 100644 index 0000000..07cace1 --- /dev/null +++ b/lib/filewatch/tail_mode/handlers/unignore.rb @@ -0,0 +1,37 @@ +# encoding: utf-8 + +module FileWatch module TailMode module Handlers + class Unignore < Base + # a watched file can be put straight into the ignored state + # before any other handling has been done + # at a minimum we create or associate a sincedb value + def handle_specifically(watched_file) + add_or_update_sincedb_collection(watched_file) unless sincedb_collection.member?(watched_file.sincedb_key) + end + + def get_new_value_specifically(watched_file) + # for file initially ignored their bytes_read was set to stat.size + # use this value not the `start_new_files_at` for the position + # logger.debug("get_new_value_specifically", "watched_file" => watched_file.inspect) + SincedbValue.new(watched_file.bytes_read).tap do |val| + val.set_watched_file(watched_file) + end + end + + def update_existing_specifically(watched_file, sincedb_value) + # when this watched_file was ignored it had it bytes_read set to eof + # now the file has changed (watched_file.size_changed?) + # it has been put into the watched state so when it becomes active + # we will handle grow or shrink + # for now we seek to where we were before the file got ignored (grow) + # or to the start (shrink) + position = 0 + if watched_file.shrunk? + watched_file.update_bytes_read(0) + else + position = watched_file.bytes_read + end + sincedb_value.update_position(position) + end + end +end end end diff --git a/lib/filewatch/tail_mode/processor.rb b/lib/filewatch/tail_mode/processor.rb new file mode 100644 index 0000000..8291280 --- /dev/null +++ b/lib/filewatch/tail_mode/processor.rb @@ -0,0 +1,209 @@ +# encoding: utf-8 +require "logstash/util/loggable" +require_relative "handlers/base" +require_relative "handlers/create_initial" +require_relative "handlers/create" +require_relative "handlers/delete" +require_relative "handlers/grow" +require_relative "handlers/shrink" +require_relative "handlers/timeout" +require_relative "handlers/unignore" + +module FileWatch module TailMode + # Must handle + # :create_initial - file is discovered and we have no record of it in the sincedb + # :create - file is discovered and we have seen it before in the sincedb + # :grow - file has more content + # :shrink - file has less content + # :delete - file can't be read + # :timeout - file is closable + # :unignore - file was ignored, but have now received new content + class Processor + include LogStash::Util::Loggable + + attr_reader :watch, :deletable_filepaths + + def initialize(settings) + @settings = settings + @deletable_filepaths = [] + end + + def add_watch(watch) + @watch = watch + self + end + + def initialize_handlers(sincedb_collection, observer) + @create_initial = Handlers::CreateInitial.new(sincedb_collection, observer, @settings) + @create = Handlers::Create.new(sincedb_collection, observer, @settings) + @grow = Handlers::Grow.new(sincedb_collection, observer, @settings) + @shrink = Handlers::Shrink.new(sincedb_collection, observer, @settings) + @delete = Handlers::Delete.new(sincedb_collection, observer, @settings) + @timeout = Handlers::Timeout.new(sincedb_collection, observer, @settings) + @unignore = Handlers::Unignore.new(sincedb_collection, observer, @settings) + end + + def create(watched_file) + @create.handle(watched_file) + end + + def create_initial(watched_file) + @create_initial.handle(watched_file) + end + + def grow(watched_file) + @grow.handle(watched_file) + end + + def shrink(watched_file) + @shrink.handle(watched_file) + end + + def delete(watched_file) + @delete.handle(watched_file) + end + + def timeout(watched_file) + @timeout.handle(watched_file) + end + + def unignore(watched_file) + @unignore.handle(watched_file) + end + + def process_closed(watched_files) + logger.debug("Closed processing") + # Handles watched_files in the closed state. + # if its size changed it is put into the watched state + watched_files.select {|wf| wf.closed? }.each do |watched_file| + path = watched_file.path + begin + watched_file.restat + if watched_file.size_changed? + # if the closed file changed, move it to the watched state + # not to active state because we want to respect the active files window. + watched_file.watch + end + rescue Errno::ENOENT + # file has gone away or we can't read it anymore. + common_deleted_reaction(watched_file, "Closed") + rescue => e + common_error_reaction(path, e, "Closed") + end + break if watch.quit? + end + end + + def process_ignored(watched_files) + logger.debug("Ignored processing") + # Handles watched_files in the ignored state. + # if its size changed: + # put it in the watched state + # invoke unignore + watched_files.select {|wf| wf.ignored? }.each do |watched_file| + path = watched_file.path + begin + watched_file.restat + if watched_file.size_changed? + watched_file.watch + unignore(watched_file) + end + rescue Errno::ENOENT + # file has gone away or we can't read it anymore. + common_deleted_reaction(watched_file, "Ignored") + rescue => e + common_error_reaction(path, e, "Ignored") + end + break if watch.quit? + end + end + + def process_watched(watched_files) + logger.debug("Watched processing") + # Handles watched_files in the watched state. + # for a slice of them: + # move to the active state + # and we allow the block to open the file and create a sincedb collection record if needed + # some have never been active and some have + # those that were active before but are watched now were closed under constraint + + # how much of the max active window is available + to_take = @settings.max_active - watched_files.count{|wf| wf.active?} + if to_take > 0 + watched_files.select {|wf| wf.watched?}.take(to_take).each do |watched_file| + path = watched_file.path + begin + watched_file.restat + watched_file.activate + if watched_file.initial? + create_initial(watched_file) + else + create(watched_file) + end + rescue Errno::ENOENT + # file has gone away or we can't read it anymore. + common_deleted_reaction(watched_file, "Watched") + rescue => e + common_error_reaction(path, e, "Watched") + end + break if watch.quit? + end + else + now = Time.now.to_i + if (now - watch.lastwarn_max_files) > MAX_FILES_WARN_INTERVAL + waiting = watched_files.size - @settings.max_active + logger.warn(@settings.max_warn_msg + ", files yet to open: #{waiting}") + watch.lastwarn_max_files = now + end + end + end + + def process_active(watched_files) + logger.debug("Active processing") + # Handles watched_files in the active state. + # it has been read once - unless they were empty at the time + watched_files.select {|wf| wf.active? }.each do |watched_file| + path = watched_file.path + begin + watched_file.restat + rescue Errno::ENOENT + # file has gone away or we can't read it anymore. + common_deleted_reaction(watched_file, "Active") + next + rescue => e + common_error_reaction(path, e, "Active") + next + end + break if watch.quit? + if watched_file.grown? + logger.debug("Active - file grew: #{path}: new size is #{watched_file.last_stat_size}, old size #{watched_file.bytes_read}") + grow(watched_file) + elsif watched_file.shrunk? + # we don't update the size here, its updated when we actually read + logger.debug("Active - file shrunk #{path}: new size is #{watched_file.last_stat_size}, old size #{watched_file.bytes_read}") + shrink(watched_file) + else + # same size, do nothing + end + # can any active files be closed to make way for waiting files? + if watched_file.file_closable? + logger.debug("Watch each: active: file expired: #{path}") + timeout(watched_file) + watched_file.close + end + end + end + + def common_deleted_reaction(watched_file, action) + # file has gone away or we can't read it anymore. + watched_file.unwatch + delete(watched_file) + deletable_filepaths << watched_file.path + logger.debug("#{action} - stat failed: #{watched_file.path}, removing from collection") + end + + def common_error_reaction(path, error, action) + logger.error("#{action} - other error #{path}: (#{error.message}, #{error.backtrace.take(8).inspect})") + end + end +end end diff --git a/lib/filewatch/watch.rb b/lib/filewatch/watch.rb new file mode 100644 index 0000000..c544719 --- /dev/null +++ b/lib/filewatch/watch.rb @@ -0,0 +1,107 @@ +# encoding: utf-8 +require "logstash/util/loggable" + +module FileWatch + class Watch + include LogStash::Util::Loggable + + attr_accessor :lastwarn_max_files + attr_reader :discoverer, :watched_files_collection + + def initialize(discoverer, watched_files_collection, settings) + @settings = settings + # watch and iterate_on_state can be called from different threads. + @lock = Mutex.new + # we need to be threadsafe about the quit mutation + @quit = false + @quit_lock = Mutex.new + @lastwarn_max_files = 0 + @discoverer = discoverer + @watched_files_collection = watched_files_collection + end + + def add_processor(processor) + @processor = processor + @processor.add_watch(self) + self + end + + def watch(path) + synchronized do + @discoverer.add_path(path) + end + # don't return whatever @discoverer.add_path returns + return true + end + + def discover + synchronized do + @discoverer.discover + end + # don't return whatever @discoverer.discover returns + return true + end + + def subscribe(observer, sincedb_collection) + @processor.initialize_handlers(sincedb_collection, observer) + + glob = 0 + interval = @settings.discover_interval + reset_quit + until quit? + iterate_on_state + break if quit? + glob += 1 + if glob == interval + discover + glob = 0 + end + break if quit? + sleep(@settings.stat_interval) + end + @watched_files_collection.close_all + end # def subscribe + + # Read mode processor will handle watched_files in the closed, ignored, watched and active state + # differently from Tail mode - see the ReadMode::Processor and TailMode::Processor + def iterate_on_state + return if @watched_files_collection.empty? + synchronized do + begin + # creates this snapshot of watched_file values just once + watched_files = @watched_files_collection.values + @processor.process_closed(watched_files) + return if quit? + @processor.process_ignored(watched_files) + return if quit? + @processor.process_watched(watched_files) + return if quit? + @processor.process_active(watched_files) + ensure + @watched_files_collection.delete(@processor.deletable_filepaths) + @processor.deletable_filepaths.clear + end + end + end # def each + + def quit + @quit_lock.synchronize do + @quit = true + end + end # def quit + + def quit? + @quit_lock.synchronize { @quit } + end + + private + + def synchronized(&block) + @lock.synchronize { block.call } + end + + def reset_quit + @quit_lock.synchronize { @quit = false } + end + end +end diff --git a/lib/filewatch/watched_file.rb b/lib/filewatch/watched_file.rb new file mode 100644 index 0000000..dffdf29 --- /dev/null +++ b/lib/filewatch/watched_file.rb @@ -0,0 +1,226 @@ +# encoding: utf-8 + +module FileWatch + class WatchedFile + include InodeMixin # see bootstrap.rb at `if LogStash::Environment.windows?` + + attr_reader :bytes_read, :state, :file, :buffer, :recent_states + attr_reader :path, :filestat, :accessed_at, :modified_at, :pathname + attr_reader :sdb_key_v1, :last_stat_size, :listener + attr_accessor :last_open_warning_at + + # this class represents a file that has been discovered + def initialize(pathname, stat, settings) + @settings = settings + @pathname = Pathname.new(pathname) # given arg pathname might be a string or a Pathname object + @path = @pathname.to_path + @bytes_read = 0 + @last_stat_size = 0 + # the prepare_inode method is sourced from the mixed module above + @sdb_key_v1 = InodeStruct.new(*prepare_inode(path, stat)) + # initial as true means we have not associated this watched_file with a previous sincedb value yet. + # and we should read from the beginning if necessary + @initial = true + @recent_states = [] # keep last 8 states, managed in set_state + @state = :watched + set_stat(stat) # can change @last_stat_size + @listener = nil + @last_open_warning_at = nil + set_accessed_at + end + + def set_listener(observer) + @listener = observer.listener_for(@path) + end + + def unset_listener + @listener = nil + end + + def has_listener? + !@listener.nil? + end + + def sincedb_key + @sdb_key_v1 + end + + def initial_completed + @initial = false + end + + def set_accessed_at + @accessed_at = Time.now.to_f + end + + def initial? + @initial + end + + def compressed? + @path.end_with?('.gz','.gzip') + end + + def size_changed? + @last_stat_size != bytes_read + end + + def all_read? + @last_stat_size == bytes_read + end + + def open + file_add_opened(FileOpener.open(@path)) + end + + def file_add_opened(rubyfile) + @file = rubyfile + @buffer = BufferedTokenizer.new(@settings.delimiter) if @buffer.nil? + end + + def file_close + return if @file.nil? || @file.closed? + @file.close + @file = nil + end + + def file_seek(amount, whence = IO::SEEK_SET) + @file.sysseek(amount, whence) + end + + def file_read(amount) + set_accessed_at + @file.sysread(amount) + end + + def file_open? + !@file.nil? && !@file.closed? + end + + def reset_buffer + @buffer.flush + end + + def buffer_extract(data) + @buffer.extract(data) + end + + def increment_bytes_read(delta) + return if delta.nil? + @bytes_read += delta + end + + def update_bytes_read(total_bytes_read) + return if total_bytes_read.nil? + @bytes_read = total_bytes_read + end + + def update_path(_path) + @path = _path + end + + def update_stat(st) + set_stat(st) + end + + def activate + set_state :active + end + + def ignore + set_state :ignored + @bytes_read = @filestat.size + end + + def close + set_state :closed + end + + def watch + set_state :watched + end + + def unwatch + set_state :unwatched + end + + def active? + @state == :active + end + + def ignored? + @state == :ignored + end + + def closed? + @state == :closed + end + + def watched? + @state == :watched + end + + def unwatched? + @state == :unwatched + end + + def expiry_close_enabled? + !@settings.close_older.nil? + end + + def expiry_ignore_enabled? + !@settings.ignore_older.nil? + end + + def shrunk? + @last_stat_size < @bytes_read + end + + def grown? + @last_stat_size > @bytes_read + end + + def restat + set_stat(pathname.stat) + end + + def set_state(value) + @recent_states.shift if @recent_states.size == 8 + @recent_states << @state + @state = value + end + + def recent_state_history + @recent_states + Array(@state) + end + + def file_closable? + file_can_close? && all_read? + end + + def file_ignorable? + return false unless expiry_ignore_enabled? + # (Time.now - stat.mtime) <- in jruby, this does int and float + # conversions before the subtraction and returns a float. + # so use all floats upfront + (Time.now.to_f - @modified_at) > @settings.ignore_older + end + + def file_can_close? + return false unless expiry_close_enabled? + (Time.now.to_f - @accessed_at) > @settings.close_older + end + + def to_s + inspect + end + + private + + def set_stat(stat) + @modified_at = stat.mtime.to_f + @last_stat_size = stat.size + @filestat = stat + end + end +end diff --git a/lib/filewatch/watched_files_collection.rb b/lib/filewatch/watched_files_collection.rb new file mode 100644 index 0000000..e341aff --- /dev/null +++ b/lib/filewatch/watched_files_collection.rb @@ -0,0 +1,84 @@ +# encoding: utf-8 +module FileWatch + class WatchedFilesCollection + + def initialize(settings) + @sort_by = settings.file_sort_by # "last_modified" | "path" + @sort_direction = settings.file_sort_direction # "asc" | "desc" + @sort_method = method("#{@sort_by}_#{@sort_direction}".to_sym) + @files = [] + @pointers = {} + end + + def add(watched_file) + @files << watched_file + @sort_method.call + end + + def delete(paths) + Array(paths).each do |f| + index = @pointers.delete(f) + @files.delete_at(index) + end + @sort_method.call + end + + def close_all + @files.each(&:file_close) + end + + def empty? + @files.empty? + end + + def keys + @pointers.keys + end + + def values + @files + end + + def watched_file_by_path(path) + index = @pointers[path] + return nil unless index + @files[index] + end + + private + + def last_modified_asc + @files.sort! do |left, right| + left.modified_at <=> right.modified_at + end + refresh_pointers + end + + def last_modified_desc + @files.sort! do |left, right| + right.modified_at <=> left.modified_at + end + refresh_pointers + end + + def path_asc + @files.sort! do |left, right| + left.path <=> right.path + end + refresh_pointers + end + + def path_desc + @files.sort! do |left, right| + right.path <=> left.path + end + refresh_pointers + end + + def refresh_pointers + @files.each_with_index do |watched_file, index| + @pointers[watched_file.path] = index + end + end + end +end diff --git a/lib/filewatch/winhelper.rb b/lib/filewatch/winhelper.rb new file mode 100644 index 0000000..2440323 --- /dev/null +++ b/lib/filewatch/winhelper.rb @@ -0,0 +1,65 @@ +# encoding: utf-8 +require "ffi" + +module Winhelper + extend FFI::Library + + ffi_lib 'kernel32' + ffi_convention :stdcall + class FileTime < FFI::Struct + layout :lowDateTime, :uint, + :highDateTime, :uint + end + + #http://msdn.microsoft.com/en-us/library/windows/desktop/aa363788(v=vs.85).aspx + class FileInformation < FFI::Struct + layout :fileAttributes, :uint, #DWORD dwFileAttributes; + :createTime, FileTime, #FILETIME ftCreationTime; + :lastAccessTime, FileTime, #FILETIME ftLastAccessTime; + :lastWriteTime, FileTime, #FILETIME ftLastWriteTime; + :volumeSerialNumber, :uint, #DWORD dwVolumeSerialNumber; + :fileSizeHigh, :uint, #DWORD nFileSizeHigh; + :fileSizeLow, :uint, #DWORD nFileSizeLow; + :numberOfLinks, :uint, #DWORD nNumberOfLinks; + :fileIndexHigh, :uint, #DWORD nFileIndexHigh; + :fileIndexLow, :uint #DWORD nFileIndexLow; + end + + + #http://msdn.microsoft.com/en-us/library/windows/desktop/aa363858(v=vs.85).aspx + #HANDLE WINAPI CreateFile(_In_ LPCTSTR lpFileName,_In_ DWORD dwDesiredAccess,_In_ DWORD dwShareMode, + # _In_opt_ LPSECURITY_ATTRIBUTES lpSecurityAttributes,_In_ DWORD dwCreationDisposition, + # _In_ DWORD dwFlagsAndAttributes,_In_opt_ HANDLE hTemplateFile); + attach_function :GetOpenFileHandle, :CreateFileA, [:pointer, :uint, :uint, :pointer, :uint, :uint, :pointer], :pointer + + #http://msdn.microsoft.com/en-us/library/windows/desktop/aa364952(v=vs.85).aspx + #BOOL WINAPI GetFileInformationByHandle(_In_ HANDLE hFile,_Out_ LPBY_HANDLE_FILE_INFORMATION lpFileInformation); + attach_function :GetFileInformationByHandle, [:pointer, :pointer], :int + + attach_function :CloseHandle, [:pointer], :int + + + def self.GetWindowsUniqueFileIdentifier(path) + handle = GetOpenFileHandle(path, 0, 7, nil, 3, 128, nil) + fileInfo = Winhelper::FileInformation.new + success = GetFileInformationByHandle(handle, fileInfo) + CloseHandle(handle) + if success == 1 + #args = [ + # fileInfo[:fileAttributes], fileInfo[:volumeSerialNumber], fileInfo[:fileSizeHigh], fileInfo[:fileSizeLow], + # fileInfo[:numberOfLinks], fileInfo[:fileIndexHigh], fileInfo[:fileIndexLow] + # ] + #p "Information: %u %u %u %u %u %u %u " % args + #this is only guaranteed on NTFS, for ReFS on windows 2012, GetFileInformationByHandleEx should be used with FILE_ID_INFO, which returns a 128 bit identifier + return "#{fileInfo[:volumeSerialNumber]}-#{fileInfo[:fileIndexLow]}-#{fileInfo[:fileIndexHigh]}" + else + #p "cannot retrieve file information, returning path" + return path; + end + end +end + +#fileId = Winhelper.GetWindowsUniqueFileIdentifier('C:\inetpub\logs\LogFiles\W3SVC1\u_ex1fdsadfsadfasdf30612.log') +#p "FileId: " + fileId +#p "outside function, sleeping" +#sleep(10) diff --git a/lib/logstash/inputs/delete_completed_file_handler.rb b/lib/logstash/inputs/delete_completed_file_handler.rb new file mode 100644 index 0000000..c6a3e91 --- /dev/null +++ b/lib/logstash/inputs/delete_completed_file_handler.rb @@ -0,0 +1,9 @@ +# encoding: utf-8 + +module LogStash module Inputs + class DeleteCompletedFileHandler + def handle(path) + Pathname.new(path).unlink rescue nil + end + end +end end diff --git a/lib/logstash/inputs/file.rb b/lib/logstash/inputs/file.rb index fec3aa7..cb81732 100644 --- a/lib/logstash/inputs/file.rb +++ b/lib/logstash/inputs/file.rb @@ -6,7 +6,12 @@ require "pathname" require "socket" # for Socket.gethostname require "fileutils" + require_relative "file/patch" +require_relative "file_listener" +require_relative "delete_completed_file_handler" +require_relative "log_completed_file_handler" +require "filewatch/bootstrap" # Stream events from files, normally by tailing them in a manner # similar to `tail -0F` but optionally reading them from the @@ -26,8 +31,8 @@ # # ==== Reading from remote network volumes # -# The file input is not tested on remote filesystems such as NFS, Samba, s3fs-fuse, etc. These -# remote filesystems typically have behaviors that are very different from local filesystems and +# The file input is not tested on remote filesystems such as NFS, Samba, s3fs-fuse, etc. These +# remote filesystems typically have behaviors that are very different from local filesystems and # are therefore unlikely to work correctly when used with the file input. # # ==== Tracking of current position in watched files @@ -77,8 +82,8 @@ # to the rotation and its reopening under the new name (an interval # determined by the `stat_interval` and `discover_interval` options) # will not get picked up. - -class LogStash::Inputs::File < LogStash::Inputs::Base +module LogStash module Inputs +class File < LogStash::Inputs::Base config_name "file" # The path(s) to the file(s) to use as an input. @@ -149,8 +154,8 @@ class LogStash::Inputs::File < LogStash::Inputs::Base # can be closed (allowing other files to be opened) but will be queued for # reopening when new data is detected. If reading, the file will be closed # after closed_older seconds from when the last bytes were read. - # The default is 1 hour - config :close_older, :validate => :number, :default => 1 * 60 * 60 + # By default, this option disabled + config :close_older, :validate => :number # What is the maximum number of file_handles that this input consumes # at any one time. Use close_older to close some files if you need to @@ -160,10 +165,66 @@ class LogStash::Inputs::File < LogStash::Inputs::Base # The default of 4095 is set in filewatch. config :max_open_files, :validate => :number + # What mode do you want the file input to operate in. + # Tail a few files or read many content-complete files + # The default is tail + # If "read" is specified then the following other settings are ignored + # `start_position` (files are always read from the beginning) + # `delimiter` (files are assumed to use \n or \r (or both) as line endings) + # `close_older` (files are automatically 'closed' when EOF is reached) + # If "read" is specified then the following settings are heeded + # `ignore_older` (older files are not processed) + # "read" mode now supports gzip file processing + config :mode, :validate => [ "tail", "read"], :default => "tail" + + # When in 'read' mode, what action should be carried out when a file is done with. + # If 'delete' is specified then the file will be deleted. + # If 'log' is specified then the full path of the file is logged to the file specified + # in the `file_completed_log_path` setting. + config :file_completed_action, :validate => ["delete", "log", "log_and_delete"], :default => "delete" + + # Which file should the completely read file paths be appended to. + # Only specify this path to a file when `file_completed_action` is 'log' or 'log_and_delete'. + # IMPORTANT: this file is appended to only - it could become very large. You are responsible for file rotation. + config :file_completed_log_path, :validate => :string + + # The sincedb entry now has a last active timestamp associated with it. + # If no changes are detected in tracked files in the last N days their sincedb + # tracking record will expire and not be persisted. + # This option protects against the well known inode recycling problem. (add reference) + config :sincedb_clean_after, :validate => :number, :default => 14 # days + + # File content is read off disk in blocks or chunks, then using whatever the set delimiter + # is, lines are extracted from the chunk. Specify the size in bytes of each chunk. + # See `file_chunk_count` to see why and when to change this from the default. + # The default set internally is 32768 (32KB) + config :file_chunk_size, :validate => :number, :default => FileWatch::FILE_READ_SIZE + + # When combined with the `file_chunk_size`, this option sets how many chunks + # are read from each file before moving to the next active file. + # e.g. a `chunk_count` of 32 with the default `file_chunk_size` will process + # 1MB from each active file. See the option `max_open_files` for more info. + # The default set internally is very large, 4611686018427387903. By default + # the file is read to the end before moving to the next active file. + config :file_chunk_count, :validate => :number, :default => FileWatch::FIXNUM_MAX + + # Which attribute of a discovered file should be used to sort the discovered files. + # Files can be sort by modified date or full path alphabetic. + # The default is `last_modified` + # Previously the processing order of the discovered files was OS dependent. + config :file_sort_by, :validate => ["last_modified", "path"], :default => "last_modified" + + # Choose between ascending and descending order when also choosing between + # `last_modified` and `path` file_sort_by options. + # If ingesting the newest data first is important then opt for last_modified + desc + # If ingesting the oldest data first is important then opt for last_modified + asc + # If you use a special naming convention for the file full paths then + # perhaps path + asc will help to achieve the goal of controlling the order of file ingestion + config :file_sort_direction, :validate => ["asc", "desc"], :default => "asc" + public def register require "addressable/uri" - require "filewatch/tail" require "digest/md5" @logger.trace("Registering file input", :path => @path) @host = Socket.gethostname.force_encoding(Encoding::UTF_8) @@ -171,7 +232,7 @@ def register # won't in older versions of Logstash, then we need to set it to nil. settings = defined?(LogStash::SETTINGS) ? LogStash::SETTINGS : nil - @tail_config = { + @filewatch_config = { :exclude => @exclude, :stat_interval => @stat_interval, :discover_interval => @discover_interval, @@ -179,9 +240,16 @@ def register :delimiter => @delimiter, :ignore_older => @ignore_older, :close_older => @close_older, - :max_open_files => @max_open_files + :max_open_files => @max_open_files, + :sincedb_clean_after => @sincedb_clean_after, + :file_chunk_count => @file_chunk_count, + :file_chunk_size => @file_chunk_size, + :file_sort_by => @file_sort_by, + :file_sort_direction => @file_sort_direction, } + @completed_file_handlers = [] + @path.each do |path| if Pathname.new(path).relative? raise ArgumentError.new("File paths must be absolute, relative path specified: #{path}") @@ -189,132 +257,84 @@ def register end if @sincedb_path.nil? - if settings - datapath = File.join(settings.get_value("path.data"), "plugins", "inputs", "file") - # Ensure that the filepath exists before writing, since it's deeply nested. - FileUtils::mkdir_p datapath - @sincedb_path = File.join(datapath, ".sincedb_" + Digest::MD5.hexdigest(@path.join(","))) + base_sincedb_path = build_sincedb_base_from_settings(settings) || build_sincedb_base_from_env + @sincedb_path = build_random_sincedb_filename(base_sincedb_path) + @logger.info('No sincedb_path set, generating one based on the "path" setting', :sincedb_path => @sincedb_path.to_s, :path => @path) + else + @sincedb_path = Pathname.new(@sincedb_path) + if @sincedb_path.directory? + raise ArgumentError.new("The \"sincedb_path\" argument must point to a file, received a directory: \"#{@sincedb_path}\"") end end - # This section is going to be deprecated eventually, as path.data will be - # the default, not an environment variable (SINCEDB_DIR or HOME) - if @sincedb_path.nil? # If it is _still_ nil... - if ENV["SINCEDB_DIR"].nil? && ENV["HOME"].nil? - @logger.error("No SINCEDB_DIR or HOME environment variable set, I don't know where " \ - "to keep track of the files I'm watching. Either set " \ - "HOME or SINCEDB_DIR in your environment, or set sincedb_path in " \ - "in your Logstash config for the file input with " \ - "path '#{@path.inspect}'") - raise # TODO(sissel): HOW DO I FAIL PROPERLY YO - end - - #pick SINCEDB_DIR if available, otherwise use HOME - sincedb_dir = ENV["SINCEDB_DIR"] || ENV["HOME"] - - # Join by ',' to make it easy for folks to know their own sincedb - # generated path (vs, say, inspecting the @path array) - @sincedb_path = File.join(sincedb_dir, ".sincedb_" + Digest::MD5.hexdigest(@path.join(","))) - - # Migrate any old .sincedb to the new file (this is for version <=1.1.1 compatibility) - old_sincedb = File.join(sincedb_dir, ".sincedb") - if File.exists?(old_sincedb) - @logger.debug("Renaming old ~/.sincedb to new one", :old => old_sincedb, - :new => @sincedb_path) - File.rename(old_sincedb, @sincedb_path) + @filewatch_config[:sincedb_path] = @sincedb_path + + @filewatch_config[:start_new_files_at] = @start_position.to_sym + + if @file_completed_action.include?('log') + if @file_completed_log_path.nil? + raise ArgumentError.new('The "file_completed_log_path" setting must be provided when the "file_completed_action" is set to "log" or "log_and_delete"') + else + @file_completed_log_path = Pathname.new(@file_completed_log_path) + unless @file_completed_log_path.exist? + begin + FileUtils.touch(@file_completed_log_path) + rescue + raise ArgumentError.new("The \"file_completed_log_path\" file can't be created: #{@file_completed_log_path}") + end + end end - - @logger.info("No sincedb_path set, generating one based on the file path", - :sincedb_path => @sincedb_path, :path => @path) - end - - if File.directory?(@sincedb_path) - raise ArgumentError.new("The \"sincedb_path\" argument must point to a file, received a directory: \"#{@sincedb_path}\"") end - @tail_config[:sincedb_path] = @sincedb_path - - if @start_position == "beginning" - @tail_config[:start_new_files_at] = :beginning + if tail_mode? + @watcher_class = FileWatch::ObservingTail + else + @watcher_class = FileWatch::ObservingRead + if @file_completed_action.include?('log') + @completed_file_handlers << LogCompletedFileHandler.new(@file_completed_log_path) + end + if @file_completed_action.include?('delete') + @completed_file_handlers << DeleteCompletedFileHandler.new + end end - @codec = LogStash::Codecs::IdentityMapCodec.new(@codec) end # def register - class ListenerTail - # use attr_reader to define noop methods - attr_reader :input, :path, :data - attr_reader :deleted, :created, :error, :eof - - # construct with upstream state - def initialize(path, input) - @path, @input = path, input - end - - def timed_out - input.codec.evict(path) - end - - def accept(data) - # and push transient data filled dup listener downstream - input.log_line_received(path, data) - input.codec.accept(dup_adding_state(data)) - end - - def process_event(event) - event.set("[@metadata][path]", path) - event.set("path", path) if !event.include?("path") - input.post_process_this(event) - end - - def add_state(data) - @data = data - self - end - - private - - # duplicate and add state for downstream - def dup_adding_state(line) - self.class.new(path, input).add_state(line) - end - end - - class FlushableListener < ListenerTail - attr_writer :path - end - def listener_for(path) # path is the identity - ListenerTail.new(path, self) + FileListener.new(path, self) end - def begin_tailing + def start_processing # if the pipeline restarts this input, # make sure previous files are closed stop - # use observer listener api - @tail = FileWatch::Tail.new_observing(@tail_config) - @tail.logger = @logger - @path.each { |path| @tail.tail(path) } + @watcher = @watcher_class.new(@filewatch_config) + @path.each { |path| @watcher.watch_this(path) } end def run(queue) - begin_tailing + start_processing @queue = queue - @tail.subscribe(self) + @watcher.subscribe(self) # halts here until quit is called exit_flush end # def run def post_process_this(event) event.set("[@metadata][host]", @host) - event.set("host", @host) if !event.include?("host") + event.set("host", @host) unless event.include?("host") decorate(event) @queue << event end + def handle_deletable_path(path) + return if tail_mode? + return if @completed_file_handlers.empty? + @completed_file_handlers.each { |handler| handler.handle(path) } + end + def log_line_received(path, line) - return if !@logger.debug? + return unless @logger.debug? @logger.debug("Received line", :path => path, :text => line) end @@ -322,14 +342,50 @@ def stop # in filewatch >= 0.6.7, quit will closes and forget all files # but it will write their last read positions to since_db # beforehand - if @tail + if @watcher @codec.close - @tail.quit + @watcher.quit end end private + def build_sincedb_base_from_settings(settings) + logstash_data_path = settings.get_value("path.data") + Pathname.new(logstash_data_path).join("plugins", "inputs", "file").tap do |path| + # Ensure that the filepath exists before writing, since it's deeply nested. + path.mkpath + end + end + + def build_sincedb_base_from_env + # This section is going to be deprecated eventually, as path.data will be + # the default, not an environment variable (SINCEDB_DIR or LOGSTASH_HOME) + if ENV["SINCEDB_DIR"].nil? && ENV["LOGSTASH_HOME"].nil? + @logger.error("No SINCEDB_DIR or LOGSTASH_HOME environment variable set, I don't know where " \ + "to keep track of the files I'm watching. Either set " \ + "LOGSTASH_HOME or SINCEDB_DIR in your environment, or set sincedb_path in " \ + "in your Logstash config for the file input with " \ + "path '#{@path.inspect}'") + raise ArgumentError.new('The "sincedb_path" setting was not given and the environment variables "SINCEDB_DIR" or "LOGSTASH_HOME" are not set so we cannot build a file path for the sincedb') + end + Pathname.new(ENV["SINCEDB_DIR"] || ENV["LOGSTASH_HOME"]) + end + + def build_random_sincedb_filename(pathname) + # Join by ',' to make it easy for folks to know their own sincedb + # generated path (vs, say, inspecting the @path array) + pathname.join(".sincedb_" + Digest::MD5.hexdigest(@path.join(","))) + end + + def tail_mode? + @mode == "tail" + end + + def read_mode? + !tail_mode? + end + def exit_flush listener = FlushableListener.new("none", self) if @codec.identity_count.zero? @@ -345,4 +401,4 @@ def exit_flush @codec.flush_mapped(listener) end end -end # class LogStash::Inputs::File +end end end# class LogStash::Inputs::File diff --git a/lib/logstash/inputs/file_listener.rb b/lib/logstash/inputs/file_listener.rb new file mode 100644 index 0000000..71ddd42 --- /dev/null +++ b/lib/logstash/inputs/file_listener.rb @@ -0,0 +1,61 @@ +# encoding: utf-8 + +module LogStash module Inputs + # As and when a new WatchedFile is processed FileWatch asks for an instance of this class for the + # file path of that WatchedFile. All subsequent callbacks are sent via this listener instance. + # The file is essentially a stream and the path is the identity of that stream. + class FileListener + attr_reader :input, :path, :data + # construct with link back to the input plugin instance. + def initialize(path, input) + @path, @input = path, input + @data = nil + end + + def opened + end + + def eof + end + + def error + end + + def timed_out + input.codec.evict(path) + end + + def deleted + input.codec.evict(path) + input.handle_deletable_path(path) + end + + def accept(data) + # and push transient data filled dup listener downstream + input.log_line_received(path, data) + input.codec.accept(dup_adding_state(data)) + end + + def process_event(event) + event.set("[@metadata][path]", path) + event.set("path", path) unless event.include?("path") + input.post_process_this(event) + end + + def add_state(data) + @data = data + self + end + + private + + # duplicate and add state for downstream + def dup_adding_state(line) + self.class.new(path, input).add_state(line) + end + end + + class FlushableListener < FileListener + attr_writer :path + end +end end diff --git a/lib/logstash/inputs/log_completed_file_handler.rb b/lib/logstash/inputs/log_completed_file_handler.rb new file mode 100644 index 0000000..5210ac7 --- /dev/null +++ b/lib/logstash/inputs/log_completed_file_handler.rb @@ -0,0 +1,13 @@ +# encoding: utf-8 + +module LogStash module Inputs + class LogCompletedFileHandler + def initialize(log_completed_file_path) + @log_completed_file_path = Pathname.new(log_completed_file_path) + end + + def handle(path) + @log_completed_file_path.open("a") { |fd| fd.puts(path) } + end + end +end end diff --git a/logstash-input-file.gemspec b/logstash-input-file.gemspec index 4316146..274c1cb 100644 --- a/logstash-input-file.gemspec +++ b/logstash-input-file.gemspec @@ -1,8 +1,8 @@ Gem::Specification.new do |s| s.name = 'logstash-input-file' - s.version = '4.0.5' - s.licenses = ['Apache License (2.0)'] + s.version = '5.0.0' + s.licenses = ['Apache-2.0'] s.summary = "Streams events from files" s.description = "This gem is a Logstash plugin required to be installed on top of the Logstash core pipeline using $LS_HOME/bin/logstash-plugin install gemname. This gem is not a stand-alone program" s.authors = ["Elastic"] @@ -11,7 +11,7 @@ Gem::Specification.new do |s| s.require_paths = ["lib"] # Files - s.files = Dir["lib/**/*","spec/**/*","*.gemspec","*.md","CONTRIBUTORS","Gemfile","LICENSE","NOTICE.TXT", "vendor/jar-dependencies/**/*.jar", "vendor/jar-dependencies/**/*.rb", "VERSION", "docs/**/*"] + s.files = Dir["lib/**/*","spec/**/*","*.gemspec","*.md","CONTRIBUTORS","Gemfile","LICENSE","NOTICE.TXT", "vendor/jar-dependencies/**/*.jar", "vendor/jar-dependencies/**/*.rb", "VERSION", "JAR_VERSION", "docs/**/*"] # Tests s.test_files = s.files.grep(%r{^(test|spec|features)/}) @@ -24,11 +24,12 @@ Gem::Specification.new do |s| s.add_runtime_dependency 'logstash-codec-plain' s.add_runtime_dependency 'addressable' - s.add_runtime_dependency 'filewatch', ['>= 0.8.1', '~> 0.8'] s.add_runtime_dependency 'logstash-codec-multiline', ['~> 3.0'] s.add_development_dependency 'stud', ['~> 0.0.19'] s.add_development_dependency 'logstash-devutils' s.add_development_dependency 'logstash-codec-json' s.add_development_dependency 'rspec-sequencing' + s.add_development_dependency "rspec-wait" + s.add_development_dependency 'timecop' end diff --git a/settings.gradle b/settings.gradle new file mode 100644 index 0000000..73158f0 --- /dev/null +++ b/settings.gradle @@ -0,0 +1 @@ +rootProject.name = 'filewatch' \ No newline at end of file diff --git a/spec/filewatch/buftok_spec.rb b/spec/filewatch/buftok_spec.rb new file mode 100644 index 0000000..17e7532 --- /dev/null +++ b/spec/filewatch/buftok_spec.rb @@ -0,0 +1,24 @@ +require_relative 'spec_helper' + +describe FileWatch::BufferedTokenizer do + + context "when using the default delimiter" do + it "splits the lines correctly" do + expect(subject.extract("hello\nworld\n")).to eq ["hello", "world"] + end + + it "holds partial lines back until a token is found" do + buffer = described_class.new + expect(buffer.extract("hello\nwor")).to eq ["hello"] + expect(buffer.extract("ld\n")).to eq ["world"] + end + end + + context "when passing a custom delimiter" do + subject { FileWatch::BufferedTokenizer.new("\r\n") } + + it "splits the lines correctly" do + expect(subject.extract("hello\r\nworld\r\n")).to eq ["hello", "world"] + end + end +end diff --git a/spec/filewatch/reading_spec.rb b/spec/filewatch/reading_spec.rb new file mode 100644 index 0000000..bb574d3 --- /dev/null +++ b/spec/filewatch/reading_spec.rb @@ -0,0 +1,128 @@ + +require 'stud/temporary' +require_relative 'spec_helper' +require 'filewatch/observing_read' + +module FileWatch + describe Watch do + before(:all) do + @thread_abort = Thread.abort_on_exception + Thread.abort_on_exception = true + end + + after(:all) do + Thread.abort_on_exception = @thread_abort + end + + let(:directory) { Stud::Temporary.directory } + let(:watch_dir) { ::File.join(directory, "*.log") } + let(:file_path) { ::File.join(directory, "1.log") } + let(:sincedb_path) { ::File.join(Stud::Temporary.directory, "reading.sdb") } + let(:stat_interval) { 0.1 } + let(:discover_interval) { 4 } + let(:start_new_files_at) { :end } # should be irrelevant for read mode + let(:opts) do + { + :stat_interval => stat_interval, :start_new_files_at => start_new_files_at, + :delimiter => "\n", :discover_interval => discover_interval, + :ignore_older => 3600, :sincedb_path => sincedb_path + } + end + let(:observer) { TestObserver.new } + let(:reading) { ObservingRead.new(opts) } + let(:actions) do + RSpec::Sequencing.run_after(0.45, "quit after a short time") do + reading.quit + end + end + + after do + FileUtils.rm_rf(directory) unless directory =~ /fixture/ + end + + context "when watching a directory with files" do + let(:directory) { Stud::Temporary.directory } + let(:watch_dir) { ::File.join(directory, "*.log") } + let(:file_path) { ::File.join(directory, "1.log") } + + it "the file is read" do + File.open(file_path, "wb") { |file| file.write("line1\nline2\n") } + actions.activate + reading.watch_this(watch_dir) + reading.subscribe(observer) + expect(observer.listener_for(file_path).calls).to eq([:open, :accept, :accept, :eof, :delete]) + expect(observer.listener_for(file_path).lines).to eq(["line1", "line2"]) + end + end + + context "when watching a directory with files using striped reading" do + let(:directory) { Stud::Temporary.directory } + let(:watch_dir) { ::File.join(directory, "*.log") } + let(:file_path1) { ::File.join(directory, "1.log") } + let(:file_path2) { ::File.join(directory, "2.log") } + # use a chunk size that does not align with the line boundaries + let(:opts) { super.merge(:file_chunk_size => 10, :file_chunk_count => 1)} + let(:lines) { [] } + let(:observer) { TestObserver.new(lines) } + + it "the files are read seemingly in parallel" do + File.open(file_path1, "w") { |file| file.write("string1\nstring2\n") } + File.open(file_path2, "w") { |file| file.write("stringA\nstringB\n") } + actions.activate + reading.watch_this(watch_dir) + reading.subscribe(observer) + if lines.first == "stringA" + expect(lines).to eq(%w(stringA string1 stringB string2)) + else + expect(lines).to eq(%w(string1 stringA string2 stringB)) + end + end + end + + describe "reading fixtures" do + let(:directory) { FIXTURE_DIR } + + context "for an uncompressed file" do + let(:watch_dir) { ::File.join(directory, "unc*.log") } + let(:file_path) { ::File.join(directory, 'uncompressed.log') } + + it "the file is read" do + FileWatch.make_fixture_current(file_path) + actions.activate + reading.watch_this(watch_dir) + reading.subscribe(observer) + expect(observer.listener_for(file_path).calls).to eq([:open, :accept, :accept, :eof, :delete]) + expect(observer.listener_for(file_path).lines.size).to eq(2) + end + end + + context "for another uncompressed file" do + let(:watch_dir) { ::File.join(directory, "invalid*.log") } + let(:file_path) { ::File.join(directory, 'invalid_utf8.gbk.log') } + + it "the file is read" do + FileWatch.make_fixture_current(file_path) + actions.activate + reading.watch_this(watch_dir) + reading.subscribe(observer) + expect(observer.listener_for(file_path).calls).to eq([:open, :accept, :accept, :eof, :delete]) + expect(observer.listener_for(file_path).lines.size).to eq(2) + end + end + + context "for a compressed file" do + let(:watch_dir) { ::File.join(directory, "compressed.*.gz") } + let(:file_path) { ::File.join(directory, 'compressed.log.gz') } + + it "the file is read" do + FileWatch.make_fixture_current(file_path) + actions.activate + reading.watch_this(watch_dir) + reading.subscribe(observer) + expect(observer.listener_for(file_path).calls).to eq([:open, :accept, :accept, :eof, :delete]) + expect(observer.listener_for(file_path).lines.size).to eq(2) + end + end + end + end +end diff --git a/spec/filewatch/sincedb_record_serializer_spec.rb b/spec/filewatch/sincedb_record_serializer_spec.rb new file mode 100644 index 0000000..7beaba9 --- /dev/null +++ b/spec/filewatch/sincedb_record_serializer_spec.rb @@ -0,0 +1,71 @@ +# encoding: utf-8 +require_relative 'spec_helper' +require 'filewatch/settings' +require 'filewatch/sincedb_record_serializer' + +module FileWatch + describe SincedbRecordSerializer do + let(:opts) { Hash.new } + let(:io) { StringIO.new } + let(:db) { Hash.new } + + subject { described_class.new(Settings.days_to_seconds(14)) } + + context "deserialize from IO" do + it 'reads V1 records' do + io.write("5391299 1 4 12\n") + subject.deserialize(io) do |inode_struct, sincedb_value| + expect(inode_struct.inode).to eq("5391299") + expect(inode_struct.maj).to eq(1) + expect(inode_struct.min).to eq(4) + expect(sincedb_value.position).to eq(12) + end + end + + it 'reads V2 records from an IO object' do + now = Time.now.to_f + io.write("5391299 1 4 12 #{now} /a/path/to/1.log\n") + subject.deserialize(io) do |inode_struct, sincedb_value| + expect(inode_struct.inode).to eq("5391299") + expect(inode_struct.maj).to eq(1) + expect(inode_struct.min).to eq(4) + expect(sincedb_value.position).to eq(12) + expect(sincedb_value.last_changed_at).to eq(now) + expect(sincedb_value.path_in_sincedb).to eq("/a/path/to/1.log") + end + end + end + + context "serialize to IO" do + it "writes db entries" do + now = Time.now.to_f + inode_struct = InodeStruct.new("42424242", 2, 5) + sincedb_value = SincedbValue.new(42, now) + db[inode_struct] = sincedb_value + subject.serialize(db, io) + expect(io.string).to eq("42424242 2 5 42 #{now}\n") + end + + it "does not write expired db entries to an IO object" do + twelve_days_ago = Time.now.to_f - (12.0*24*3600) + sixteen_days_ago = twelve_days_ago - (4.0*24*3600) + db[InodeStruct.new("42424242", 2, 5)] = SincedbValue.new(42, twelve_days_ago) + db[InodeStruct.new("18181818", 1, 6)] = SincedbValue.new(99, sixteen_days_ago) + subject.serialize(db, io) + expect(io.string).to eq("42424242 2 5 42 #{twelve_days_ago}\n") + end + end + + context "given a non default `sincedb_clean_after`" do + it "does not write expired db entries to an IO object" do + subject.update_sincedb_value_expiry_from_days(2) + one_day_ago = Time.now.to_f - (1.0*24*3600) + three_days_ago = one_day_ago - (2.0*24*3600) + db[InodeStruct.new("42424242", 2, 5)] = SincedbValue.new(42, one_day_ago) + db[InodeStruct.new("18181818", 1, 6)] = SincedbValue.new(99, three_days_ago) + subject.serialize(db, io) + expect(io.string).to eq("42424242 2 5 42 #{one_day_ago}\n") + end + end + end +end \ No newline at end of file diff --git a/spec/filewatch/spec_helper.rb b/spec/filewatch/spec_helper.rb new file mode 100644 index 0000000..578cc46 --- /dev/null +++ b/spec/filewatch/spec_helper.rb @@ -0,0 +1,120 @@ +require "rspec_sequencing" +require 'rspec/wait' +require "logstash/devutils/rspec/spec_helper" +require "timecop" + +def formatted_puts(text) + cfg = RSpec.configuration + return unless cfg.formatters.first.is_a?( + RSpec::Core::Formatters::DocumentationFormatter) + txt = cfg.format_docstrings_block.call(text) + cfg.output_stream.puts " #{txt}" +end + +unless RSpec::Matchers.method_defined?(:receive_call_and_args) + RSpec::Matchers.define(:receive_call_and_args) do |m, args| + match do |actual| + actual.trace_for(m) == args + end + + failure_message do + "Expecting method #{m} to receive: #{args} but got: #{actual.trace_for(m)}" + end + end +end + +require 'filewatch/bootstrap' + +module FileWatch + + FIXTURE_DIR = File.join('spec', 'fixtures') + + def self.make_file_older(path, seconds) + time = Time.now.to_f - seconds + ::File.utime(time, time, path) + end + + def self.make_fixture_current(path, time = Time.now) + ::File.utime(time, time, path) + end + + class TracerBase + def initialize + @tracer = [] + end + + def trace_for(symbol) + params = @tracer.map {|k,v| k == symbol ? v : nil}.compact + params.empty? ? false : params + end + + def clear + @tracer.clear + end + end + + module NullCallable + def self.call + end + end + + class TestObserver + class Listener + attr_reader :path, :lines, :calls + + def initialize(path) + @path = path + @lines = [] + @calls = [] + end + + def add_lines(lines) + @lines = lines + self + end + + def accept(line) + @lines << line + @calls << :accept + end + + def deleted + @calls << :delete + end + + def opened + @calls << :open + end + + def error + @calls << :error + end + + def eof + @calls << :eof + end + + def timed_out + @calls << :timed_out + end + end + + attr_reader :listeners + + def initialize(combined_lines = nil) + listener_proc = if combined_lines.nil? + lambda{|k| Listener.new(k) } + else + lambda{|k| Listener.new(k).add_lines(combined_lines) } + end + @listeners = Hash.new {|hash, key| hash[key] = listener_proc.call(key) } + end + + def listener_for(path) + @listeners[path] + end + + def clear + @listeners.clear; end + end +end diff --git a/spec/filewatch/tailing_spec.rb b/spec/filewatch/tailing_spec.rb new file mode 100644 index 0000000..11f50eb --- /dev/null +++ b/spec/filewatch/tailing_spec.rb @@ -0,0 +1,440 @@ + +require 'stud/temporary' +require_relative 'spec_helper' +require 'filewatch/observing_tail' + +LogStash::Logging::Logger::configure_logging("WARN") +# LogStash::Logging::Logger::configure_logging("DEBUG") + +module FileWatch + describe Watch do + before(:all) do + @thread_abort = Thread.abort_on_exception + Thread.abort_on_exception = true + end + + after(:all) do + Thread.abort_on_exception = @thread_abort + end + + let(:directory) { Stud::Temporary.directory } + let(:watch_dir) { ::File.join(directory, "*.log") } + let(:file_path) { ::File.join(directory, "1.log") } + let(:max) { 4095 } + let(:stat_interval) { 0.1 } + let(:discover_interval) { 4 } + let(:start_new_files_at) { :beginning } + let(:sincedb_path) { ::File.join(directory, "tailing.sdb") } + let(:opts) do + { + :stat_interval => stat_interval, :start_new_files_at => start_new_files_at, :max_active => max, + :delimiter => "\n", :discover_interval => discover_interval, :sincedb_path => sincedb_path + } + end + let(:observer) { TestObserver.new } + let(:tailing) { ObservingTail.new(opts) } + + after do + FileUtils.rm_rf(directory) + end + + describe "max open files (set to 1)" do + let(:max) { 1 } + let(:file_path2) { File.join(directory, "2.log") } + let(:wait_before_quit) { 0.15 } + let(:stat_interval) { 0.01 } + let(:discover_interval) { 4 } + let(:actions) do + RSpec::Sequencing + .run_after(wait_before_quit, "quit after a short time") do + tailing.quit + end + end + + before do + ENV["FILEWATCH_MAX_FILES_WARN_INTERVAL"] = "0" + File.open(file_path, "wb") { |file| file.write("line1\nline2\n") } + File.open(file_path2, "wb") { |file| file.write("lineA\nlineB\n") } + end + + context "when max_active is 1" do + + it "without close_older set, opens only 1 file" do + actions.activate + tailing.watch_this(watch_dir) + tailing.subscribe(observer) + expect(tailing.settings.max_active).to eq(max) + file1_calls = observer.listener_for(file_path).calls + file2_calls = observer.listener_for(file_path2).calls + # file glob order is OS dependent + if file1_calls.empty? + expect(observer.listener_for(file_path2).lines).to eq(["lineA", "lineB"]) + expect(file2_calls).to eq([:open, :accept, :accept]) + else + expect(observer.listener_for(file_path).lines).to eq(["line1", "line2"]) + expect(file1_calls).to eq([:open, :accept, :accept]) + expect(file2_calls).to be_empty + end + end + end + + context "when close_older is set" do + let(:wait_before_quit) { 0.8 } + let(:opts) { super.merge(:close_older => 0.2, :max_active => 1, :stat_interval => 0.1) } + it "opens both files" do + actions.activate + tailing.watch_this(watch_dir) + tailing.subscribe(observer) + expect(tailing.settings.max_active).to eq(1) + filelistener_1 = observer.listener_for(file_path) + filelistener_2 = observer.listener_for(file_path2) + expect(filelistener_2.calls).to eq([:open, :accept, :accept, :timed_out]) + expect(filelistener_2.lines).to eq(["lineA", "lineB"]) + expect(filelistener_1.calls).to eq([:open, :accept, :accept, :timed_out]) + expect(filelistener_1.lines).to eq(["line1", "line2"]) + end + end + end + + context "when watching a directory with files" do + let(:start_new_files_at) { :beginning } + let(:actions) do + RSpec::Sequencing.run_after(0.45, "quit after a short time") do + tailing.quit + end + end + + it "the file is read" do + File.open(file_path, "wb") { |file| file.write("line1\nline2\n") } + actions.activate + tailing.watch_this(watch_dir) + tailing.subscribe(observer) + expect(observer.listener_for(file_path).calls).to eq([:open, :accept, :accept]) + expect(observer.listener_for(file_path).lines).to eq(["line1", "line2"]) + end + end + + context "when watching a directory without files and one is added" do + let(:start_new_files_at) { :beginning } + before do + tailing.watch_this(watch_dir) + RSpec::Sequencing + .run_after(0.25, "create file") do + File.open(file_path, "wb") { |file| file.write("line1\nline2\n") } + end + .then_after(0.45, "quit after a short time") do + tailing.quit + end + end + + it "the file is read" do + tailing.subscribe(observer) + expect(observer.listener_for(file_path).calls).to eq([:open, :accept, :accept]) + expect(observer.listener_for(file_path).lines).to eq(["line1", "line2"]) + end + end + + describe "given a previously discovered file" do + # these tests rely on the fact that the 'filepath' does not exist on disk + # it simulates that the user deleted the file + # so when a stat is taken on the file an error is raised + let(:quit_after) { 0.2 } + let(:stat) { double("stat", :size => 100, :ctime => Time.now, :mtime => Time.now, :ino => 234567, :dev_major => 3, :dev_minor => 2) } + let(:watched_file) { WatchedFile.new(file_path, stat, tailing.settings) } + + before do + tailing.watch.watched_files_collection.add(watched_file) + watched_file.initial_completed + end + + context "when a close operation occurs" do + before { watched_file.close } + it "is removed from the watched_files_collection" do + expect(tailing.watch.watched_files_collection).not_to be_empty + RSpec::Sequencing.run_after(quit_after, "quit") { tailing.quit } + tailing.subscribe(observer) + expect(tailing.watch.watched_files_collection).to be_empty + expect(observer.listener_for(file_path).calls).to eq([:delete]) + end + end + + context "an ignore operation occurs" do + before { watched_file.ignore } + it "is removed from the watched_files_collection" do + RSpec::Sequencing.run_after(quit_after, "quit") { tailing.quit } + tailing.subscribe(observer) + expect(tailing.watch.watched_files_collection).to be_empty + expect(observer.listener_for(file_path).calls).to eq([:delete]) + end + end + + context "when subscribed and a watched file is no longer readable" do + before { watched_file.watch } + it "is removed from the watched_files_collection" do + RSpec::Sequencing.run_after(quit_after, "quit") { tailing.quit } + tailing.subscribe(observer) + expect(tailing.watch.watched_files_collection).to be_empty + expect(observer.listener_for(file_path).calls).to eq([:delete]) + end + end + + context "when subscribed and an active file is no longer readable" do + before { watched_file.activate } + it "is removed from the watched_files_collection" do + RSpec::Sequencing.run_after(quit_after, "quit") { tailing.quit } + tailing.subscribe(observer) + expect(tailing.watch.watched_files_collection).to be_empty + expect(observer.listener_for(file_path).calls).to eq([:delete]) + end + end + end + + context "when a processed file shrinks" do + let(:discover_interval) { 100 } + before do + RSpec::Sequencing + .run("create file") do + File.open(file_path, "wb") { |file| file.write("line1\nline2\nline3\nline4\n") } + end + .then_after(0.25, "start watching after files are written") do + tailing.watch_this(watch_dir) + end + .then_after(0.25, "truncate file and write new content") do + File.truncate(file_path, 0) + File.open(file_path, "wb") { |file| file.write("lineA\nlineB\n") } + end + .then_after(0.25, "quit after a short time") do + tailing.quit + end + end + + it "new changes to the shrunk file are read from the beginning" do + tailing.subscribe(observer) + expect(observer.listener_for(file_path).calls).to eq([:open, :accept, :accept, :accept, :accept, :accept, :accept]) + expect(observer.listener_for(file_path).lines).to eq(["line1", "line2", "line3", "line4", "lineA", "lineB"]) + end + end + + context "when watching a directory with files and a file is renamed to not match glob" do + let(:new_file_path) { file_path + ".old" } + before do + RSpec::Sequencing + .run("create file") do + File.open(file_path, "wb") { |file| file.write("line1\nline2\n") } + end + .then_after(0.25, "start watching after files are written") do + tailing.watch_this(watch_dir) + end + .then_after(0.55, "rename file") do + FileUtils.mv(file_path, new_file_path) + end + .then_after(0.55, "then write to renamed file") do + File.open(new_file_path, "ab") { |file| file.write("line3\nline4\n") } + end + .then_after(0.45, "quit after a short time") do + tailing.quit + end + end + + it "changes to the renamed file are not read" do + tailing.subscribe(observer) + expect(observer.listener_for(file_path).calls).to eq([:open, :accept, :accept, :delete]) + expect(observer.listener_for(file_path).lines).to eq(["line1", "line2"]) + expect(observer.listener_for(new_file_path).calls).to eq([]) + expect(observer.listener_for(new_file_path).lines).to eq([]) + end + end + + context "when watching a directory with files and a file is renamed to match glob" do + let(:new_file_path) { file_path + "2.log" } + let(:opts) { super.merge(:close_older => 0) } + before do + RSpec::Sequencing + .run("create file") do + File.open(file_path, "wb") { |file| file.write("line1\nline2\n") } + end + .then_after(0.15, "start watching after files are written") do + tailing.watch_this(watch_dir) + end + .then_after(0.25, "rename file") do + FileUtils.mv(file_path, new_file_path) + end + .then("then write to renamed file") do + File.open(new_file_path, "ab") { |file| file.write("line3\nline4\n") } + end + .then_after(0.55, "quit after a short time") do + tailing.quit + end + end + + it "the first set of lines are not re-read" do + tailing.subscribe(observer) + expect(observer.listener_for(file_path).lines).to eq(["line1", "line2"]) + expect(observer.listener_for(file_path).calls).to eq([:open, :accept, :accept, :timed_out, :delete]) + expect(observer.listener_for(new_file_path).lines).to eq(["line3", "line4"]) + expect(observer.listener_for(new_file_path).calls).to eq([:open, :accept, :accept, :timed_out]) + end + end + + context "when watching a directory with files and data is appended" do + before do + RSpec::Sequencing + .run("create file") do + File.open(file_path, "wb") { |file| file.write("line1\nline2\n") } + end + .then_after(0.25, "start watching after file is written") do + tailing.watch_this(watch_dir) + end + .then_after(0.45, "append more lines to the file") do + File.open(file_path, "ab") { |file| file.write("line3\nline4\n") } + end + .then_after(0.45, "quit after a short time") do + tailing.quit + end + end + + it "appended lines are read after an EOF" do + tailing.subscribe(observer) + expect(observer.listener_for(file_path).calls).to eq([:open, :accept, :accept, :accept, :accept]) + expect(observer.listener_for(file_path).lines).to eq(["line1", "line2", "line3", "line4"]) + end + end + + context "when close older expiry is enabled" do + let(:opts) { super.merge(:close_older => 1) } + before do + RSpec::Sequencing + .run("create file") do + File.open(file_path, "wb") { |file| file.write("line1\nline2\n") } + end + .then("start watching before file ages more than close_older") do + tailing.watch_this(watch_dir) + end + .then_after(2.1, "quit after allowing time to close the file") do + tailing.quit + end + end + + it "lines are read and the file times out" do + tailing.subscribe(observer) + expect(observer.listener_for(file_path).calls).to eq([:open, :accept, :accept, :timed_out]) + expect(observer.listener_for(file_path).lines).to eq(["line1", "line2"]) + end + end + + context "when close older expiry is enabled and after timeout the file is appended-to" do + let(:opts) { super.merge(:close_older => 1) } + before do + RSpec::Sequencing + .run("create file") do + File.open(file_path, "wb") { |file| file.write("line1\nline2\n") } + end + .then("start watching before file ages more than close_older") do + tailing.watch_this(watch_dir) + end + .then_after(2.1, "append more lines to file after file ages more than close_older") do + File.open(file_path, "ab") { |file| file.write("line3\nline4\n") } + end + .then_after(2.1, "quit after allowing time to close the file") do + tailing.quit + end + end + + it "all lines are read" do + tailing.subscribe(observer) + expect(observer.listener_for(file_path).calls).to eq([:open, :accept, :accept, :timed_out, :open, :accept, :accept, :timed_out]) + expect(observer.listener_for(file_path).lines).to eq(["line1", "line2", "line3", "line4"]) + end + end + + context "when ignore older expiry is enabled and all files are already expired" do + let(:opts) { super.merge(:ignore_older => 1) } + before do + RSpec::Sequencing + .run("create file older than ignore_older and watch") do + File.open(file_path, "wb") { |file| file.write("line1\nline2\n") } + FileWatch.make_file_older(file_path, 15) + tailing.watch_this(watch_dir) + end + .then_after(1.1, "quit") do + tailing.quit + end + end + + it "no files are read" do + tailing.subscribe(observer) + expect(observer.listener_for(file_path).calls).to eq([]) + expect(observer.listener_for(file_path).lines).to eq([]) + end + end + + context "when ignore_older is less than close_older and all files are not expired" do + let(:opts) { super.merge(:ignore_older => 1, :close_older => 1.5) } + before do + RSpec::Sequencing + .run("create file") do + File.open(file_path, "wb") { |file| file.write("line1\nline2\n") } + end + .then("start watching before file age reaches ignore_older") do + tailing.watch_this(watch_dir) + end + .then_after(1.75, "quit after allowing time to close the file") do + tailing.quit + end + end + + it "reads lines normally" do + tailing.subscribe(observer) + expect(observer.listener_for(file_path).calls).to eq([:open, :accept, :accept, :timed_out]) + expect(observer.listener_for(file_path).lines).to eq(["line1", "line2"]) + end + end + + context "when ignore_older is less than close_older and all files are expired" do + let(:opts) { super.merge(:ignore_older => 10, :close_older => 1) } + before do + RSpec::Sequencing + .run("create file older than ignore_older and watch") do + File.open(file_path, "wb") { |file| file.write("line1\nline2\n") } + FileWatch.make_file_older(file_path, 15) + tailing.watch_this(watch_dir) + end + .then_after(1.5, "quit after allowing time to check the files") do + tailing.quit + end + end + + it "no files are read" do + tailing.subscribe(observer) + expect(observer.listener_for(file_path).calls).to eq([]) + expect(observer.listener_for(file_path).lines).to eq([]) + end + end + + context "when ignore older and close older expiry is enabled and after timeout the file is appended-to" do + let(:opts) { super.merge(:ignore_older => 20, :close_older => 1) } + before do + RSpec::Sequencing + .run("create file older than ignore_older and watch") do + File.open(file_path, "wb") { |file| file.write("line1\nline2\n") } + FileWatch.make_file_older(file_path, 25) + tailing.watch_this(watch_dir) + end + .then_after(0.15, "append more lines to file after file ages more than ignore_older") do + File.open(file_path, "ab") { |file| file.write("line3\nline4\n") } + end + .then_after(1.25, "quit after allowing time to close the file") do + tailing.quit + end + end + + it "reads the added lines only" do + tailing.subscribe(observer) + expect(observer.listener_for(file_path).lines).to eq(["line3", "line4"]) + expect(observer.listener_for(file_path).calls).to eq([:open, :accept, :accept, :timed_out]) + end + end + end +end + + diff --git a/spec/filewatch/watched_file_spec.rb b/spec/filewatch/watched_file_spec.rb new file mode 100644 index 0000000..69181e4 --- /dev/null +++ b/spec/filewatch/watched_file_spec.rb @@ -0,0 +1,38 @@ +require 'stud/temporary' +require_relative 'spec_helper' + +module FileWatch + describe WatchedFile do + let(:pathname) { Pathname.new(__FILE__) } + + context 'Given two instances of the same file' do + it 'their sincedb_keys should equate' do + wf_key1 = WatchedFile.new(pathname, pathname.stat, Settings.new).sincedb_key + hash_db = { wf_key1 => 42 } + wf_key2 = WatchedFile.new(pathname, pathname.stat, Settings.new).sincedb_key + expect(wf_key1).to eq(wf_key2) + expect(wf_key1).to eql(wf_key2) + expect(wf_key1.hash).to eq(wf_key2.hash) + expect(hash_db[wf_key2]).to eq(42) + end + end + + context 'Given a barrage of state changes' do + it 'only the previous N state changes are remembered' do + watched_file = WatchedFile.new(pathname, pathname.stat, Settings.new) + watched_file.ignore + watched_file.watch + watched_file.activate + watched_file.watch + watched_file.close + watched_file.watch + watched_file.activate + watched_file.unwatch + watched_file.activate + watched_file.close + expect(watched_file.closed?).to be_truthy + expect(watched_file.recent_states).to eq([:watched, :active, :watched, :closed, :watched, :active, :unwatched, :active]) + end + end + end +end diff --git a/spec/filewatch/watched_files_collection_spec.rb b/spec/filewatch/watched_files_collection_spec.rb new file mode 100644 index 0000000..c8fb491 --- /dev/null +++ b/spec/filewatch/watched_files_collection_spec.rb @@ -0,0 +1,73 @@ +require_relative 'spec_helper' + +module FileWatch + describe WatchedFilesCollection do + let(:time) { Time.now } + let(:stat1) { double("stat1", :size => 98, :ctime => time - 30, :mtime => time - 30, :ino => 234567, :dev_major => 3, :dev_minor => 2) } + let(:stat2) { double("stat2", :size => 99, :ctime => time - 20, :mtime => time - 20, :ino => 234568, :dev_major => 3, :dev_minor => 2) } + let(:stat3) { double("stat3", :size => 100, :ctime => time, :mtime => time, :ino => 234569, :dev_major => 3, :dev_minor => 2) } + let(:wf1) { WatchedFile.new("/var/log/z.log", stat1, Settings.new) } + let(:wf2) { WatchedFile.new("/var/log/m.log", stat2, Settings.new) } + let(:wf3) { WatchedFile.new("/var/log/a.log", stat3, Settings.new) } + + context "sort by last_modified in ascending order" do + let(:sort_by) { "last_modified" } + let(:sort_direction) { "asc" } + + it "sorts earliest modified first" do + collection = described_class.new(Settings.from_options(:file_sort_by => sort_by, :file_sort_direction => sort_direction)) + collection.add(wf2) + expect(collection.values).to eq([wf2]) + collection.add(wf3) + expect(collection.values).to eq([wf2, wf3]) + collection.add(wf1) + expect(collection.values).to eq([wf1, wf2, wf3]) + end + end + + context "sort by path in ascending order" do + let(:sort_by) { "path" } + let(:sort_direction) { "asc" } + + it "sorts path A-Z" do + collection = described_class.new(Settings.from_options(:file_sort_by => sort_by, :file_sort_direction => sort_direction)) + collection.add(wf2) + expect(collection.values).to eq([wf2]) + collection.add(wf1) + expect(collection.values).to eq([wf2, wf1]) + collection.add(wf3) + expect(collection.values).to eq([wf3, wf2, wf1]) + end + end + + context "sort by last_modified in descending order" do + let(:sort_by) { "last_modified" } + let(:sort_direction) { "desc" } + + it "sorts latest modified first" do + collection = described_class.new(Settings.from_options(:file_sort_by => sort_by, :file_sort_direction => sort_direction)) + collection.add(wf2) + expect(collection.values).to eq([wf2]) + collection.add(wf1) + expect(collection.values).to eq([wf2, wf1]) + collection.add(wf3) + expect(collection.values).to eq([wf3, wf2, wf1]) + end + end + + context "sort by path in descending order" do + let(:sort_by) { "path" } + let(:sort_direction) { "desc" } + + it "sorts path Z-A" do + collection = described_class.new(Settings.from_options(:file_sort_by => sort_by, :file_sort_direction => sort_direction)) + collection.add(wf2) + expect(collection.values).to eq([wf2]) + collection.add(wf1) + expect(collection.values).to eq([wf1, wf2]) + collection.add(wf3) + expect(collection.values).to eq([wf1, wf2, wf3]) + end + end + end +end diff --git a/spec/filewatch/winhelper_spec.rb b/spec/filewatch/winhelper_spec.rb new file mode 100644 index 0000000..f65e564 --- /dev/null +++ b/spec/filewatch/winhelper_spec.rb @@ -0,0 +1,22 @@ +require "stud/temporary" +require "fileutils" + +if Gem.win_platform? + require "lib/filewatch/winhelper" + + describe Winhelper do + let(:path) { Stud::Temporary.file.path } + + after do + FileUtils.rm_rf(path) + end + + it "return a unique file identifier" do + volume_serial, file_index_low, file_index_high = Winhelper.GetWindowsUniqueFileIdentifier(path).split("").map(&:to_i) + + expect(volume_serial).not_to eq(0) + expect(file_index_low).not_to eq(0) + expect(file_index_high).not_to eq(0) + end + end +end diff --git a/spec/fixtures/compressed.log.gz b/spec/fixtures/compressed.log.gz new file mode 100644 index 0000000..e8e0cb9 Binary files /dev/null and b/spec/fixtures/compressed.log.gz differ diff --git a/spec/fixtures/compressed.log.gzip b/spec/fixtures/compressed.log.gzip new file mode 100644 index 0000000..e8e0cb9 Binary files /dev/null and b/spec/fixtures/compressed.log.gzip differ diff --git a/spec/fixtures/invalid_utf8.gbk.log b/spec/fixtures/invalid_utf8.gbk.log new file mode 100644 index 0000000..372387b --- /dev/null +++ b/spec/fixtures/invalid_utf8.gbk.log @@ -0,0 +1,2 @@ +2015-01-01T02:52:45.866722Z no "GET http://www.logstash.com:80/utfmadness/¡Ö4od HTTP/1.1" + diff --git a/spec/fixtures/no-final-newline.log b/spec/fixtures/no-final-newline.log new file mode 100644 index 0000000..0790930 --- /dev/null +++ b/spec/fixtures/no-final-newline.log @@ -0,0 +1,2 @@ +2010-03-12 23:51:20 SEA4 192.0.2.147 connect 2014 OK bfd8a98bee0840d9b871b7f6ade9908f rtmp://shqshne4jdp4b6.cloudfront.net/cfx/st​ key=value http://player.longtailvideo.com/player.swf http://www.longtailvideo.com/support/jw-player-setup-wizard?example=204 LNX%2010,0,32,18 - - - - +2010-03-12 23:51:21 SEA4 192.0.2.222 play 3914 OK bfd8a98bee0840d9b871b7f6ade9908f rtmp://shqshne4jdp4b6.cloudfront.net/cfx/st​ key=value http://player.longtailvideo.com/player.swf http://www.longtailvideo.com/support/jw-player-setup-wizard?example=204 LNX%2010,0,32,18 myvideo p=2&q=4 flv 1 \ No newline at end of file diff --git a/spec/fixtures/uncompressed.log b/spec/fixtures/uncompressed.log new file mode 100644 index 0000000..d827e02 --- /dev/null +++ b/spec/fixtures/uncompressed.log @@ -0,0 +1,2 @@ +2010-03-12 23:51:20 SEA4 192.0.2.147 connect 2014 OK bfd8a98bee0840d9b871b7f6ade9908f rtmp://shqshne4jdp4b6.cloudfront.net/cfx/st​ key=value http://player.longtailvideo.com/player.swf http://www.longtailvideo.com/support/jw-player-setup-wizard?example=204 LNX%2010,0,32,18 - - - - +2010-03-12 23:51:21 SEA4 192.0.2.222 play 3914 OK bfd8a98bee0840d9b871b7f6ade9908f rtmp://shqshne4jdp4b6.cloudfront.net/cfx/st​ key=value http://player.longtailvideo.com/player.swf http://www.longtailvideo.com/support/jw-player-setup-wizard?example=204 LNX%2010,0,32,18 myvideo p=2&q=4 flv 1 diff --git a/spec/spec_helper.rb b/spec/helpers/spec_helper.rb similarity index 53% rename from spec/spec_helper.rb rename to spec/helpers/spec_helper.rb index 850c275..dc725a6 100644 --- a/spec/spec_helper.rb +++ b/spec/helpers/spec_helper.rb @@ -4,41 +4,33 @@ require "rspec_sequencing" module FileInput + + FIXTURE_DIR = File.join('spec', 'fixtures') + def self.make_file_older(path, seconds) time = Time.now.to_f - seconds - File.utime(time, time, path) + ::File.utime(time, time, path) + end + + def self.make_fixture_current(path, time = Time.now) + ::File.utime(time, time, path) end - + class TracerBase - def initialize() @tracer = []; end + def initialize + @tracer = [] + end def trace_for(symbol) params = @tracer.map {|k,v| k == symbol ? v : nil}.compact params.empty? ? false : params end - def clear() - @tracer.clear() + def clear + @tracer.clear end end - class FileLogTracer < TracerBase - def warn(*args) @tracer.push [:warn, args]; end - def error(*args) @tracer.push [:error, args]; end - def debug(*args) @tracer.push [:debug, args]; end - def info(*args) @tracer.push [:info, args]; end - - def info?() true; end - def debug?() true; end - def warn?() true; end - def error?() true; end - end - - class ComponentTracer < TracerBase - def accept(*args) @tracer.push [:accept, args]; end - def deliver(*args) @tracer.push [:deliver, args]; end - end - class CodecTracer < TracerBase def decode_accept(ctx, data, listener) @tracer.push [:decode_accept, [ctx, data]] @@ -62,25 +54,6 @@ def clone end end -unless Kernel.method_defined?(:pause_until) - module Kernel - def pause_until(nap = 5, &block) - sq = SizedQueue.new(1) - th1 = Thread.new(sq) {|q| sleep nap; q.push(false) } - th2 = Thread.new(sq) do |q| - success = false - iters = nap * 5 + 1 - iters.times do - break if !!(success = block.call) - sleep(0.2) - end - q.push(success) - end - sq.pop - end - end -end - unless RSpec::Matchers.method_defined?(:receive_call_and_args) RSpec::Matchers.define(:receive_call_and_args) do |m, args| match do |actual| diff --git a/spec/inputs/file_read_spec.rb b/spec/inputs/file_read_spec.rb new file mode 100644 index 0000000..67fe206 --- /dev/null +++ b/spec/inputs/file_read_spec.rb @@ -0,0 +1,155 @@ +# encoding: utf-8 + +require "helpers/spec_helper" +require "logstash/inputs/file" + +# LogStash::Logging::Logger::configure_logging("DEBUG") + +require "tempfile" +require "stud/temporary" +require "logstash/codecs/multiline" + +FILE_DELIMITER = LogStash::Environment.windows? ? "\r\n" : "\n" + +describe LogStash::Inputs::File do + describe "'read' mode testing with input(conf) do |pipeline, queue|" do + it "should start at the beginning of an existing file and delete the file when done" do + tmpfile_path = Stud::Temporary.pathname + sincedb_path = Stud::Temporary.pathname + + conf = <<-CONFIG + input { + file { + type => "blah" + path => "#{tmpfile_path}" + sincedb_path => "#{sincedb_path}" + delimiter => "#{FILE_DELIMITER}" + mode => "read" + file_completed_action => "delete" + } + } + CONFIG + + File.open(tmpfile_path, "a") do |fd| + fd.puts("hello") + fd.puts("world") + fd.fsync + end + + events = input(conf) do |pipeline, queue| + 2.times.collect { queue.pop } + end + + expect(events.map{|e| e.get("message")}).to contain_exactly("hello", "world") + expect(File.exist?(tmpfile_path)).to be_falsey + end + end + + describe "reading fixtures" do + let(:fixture_dir) { Pathname.new(FileInput::FIXTURE_DIR).expand_path } + + context "for a file without a final newline character" do + let(:file_path) { fixture_dir.join('no-final-newline.log') } + + it "the file is read and the path is logged to the `file_completed_log_path` file" do + tmpfile_path = fixture_dir.join("no-f*.log") + sincedb_path = Stud::Temporary.pathname + FileInput.make_fixture_current(file_path.to_path) + log_completed_path = Stud::Temporary.pathname + + conf = <<-CONFIG + input { + file { + type => "blah" + path => "#{tmpfile_path}" + sincedb_path => "#{sincedb_path}" + delimiter => "#{FILE_DELIMITER}" + mode => "read" + file_completed_action => "log" + file_completed_log_path => "#{log_completed_path}" + } + } + CONFIG + + events = input(conf) do |pipeline, queue| + 2.times.collect { queue.pop } + end + + expect(events[0].get("message")).to start_with("2010-03-12 23:51") + expect(events[1].get("message")).to start_with("2010-03-12 23:51") + expect(IO.read(log_completed_path)).to eq(file_path.to_s + "\n") + end + + end + + context "for an uncompressed file" do + let(:file_path) { fixture_dir.join('uncompressed.log') } + + it "the file is read and the path is logged to the `file_completed_log_path` file" do + tmpfile_path = fixture_dir.join("unc*.log") + sincedb_path = Stud::Temporary.pathname + FileInput.make_fixture_current(file_path.to_path) + log_completed_path = Stud::Temporary.pathname + + conf = <<-CONFIG + input { + file { + type => "blah" + path => "#{tmpfile_path}" + sincedb_path => "#{sincedb_path}" + delimiter => "#{FILE_DELIMITER}" + mode => "read" + file_completed_action => "log" + file_completed_log_path => "#{log_completed_path}" + } + } + CONFIG + + events = input(conf) do |pipeline, queue| + 2.times.collect { queue.pop } + end + + expect(events[0].get("message")).to start_with("2010-03-12 23:51") + expect(events[1].get("message")).to start_with("2010-03-12 23:51") + expect(IO.read(log_completed_path)).to eq(file_path.to_s + "\n") + end + end + + context "for a compressed file" do + it "the file is read" do + tmpfile_path = fixture_dir.join("compressed.*.*") + sincedb_path = Stud::Temporary.pathname + file_path = fixture_dir.join('compressed.log.gz') + file_path2 = fixture_dir.join('compressed.log.gzip') + FileInput.make_fixture_current(file_path.to_path) + log_completed_path = Stud::Temporary.pathname + + conf = <<-CONFIG + input { + file { + type => "blah" + path => "#{tmpfile_path}" + sincedb_path => "#{sincedb_path}" + delimiter => "#{FILE_DELIMITER}" + mode => "read" + file_completed_action => "log" + file_completed_log_path => "#{log_completed_path}" + } + } + CONFIG + + events = input(conf) do |pipeline, queue| + 4.times.collect { queue.pop } + end + + expect(events[0].get("message")).to start_with("2010-03-12 23:51") + expect(events[1].get("message")).to start_with("2010-03-12 23:51") + expect(events[2].get("message")).to start_with("2010-03-12 23:51") + expect(events[3].get("message")).to start_with("2010-03-12 23:51") + logged_completions = IO.read(log_completed_path).split + expect(logged_completions.first).to match(/compressed\.log\.(gzip|gz)$/) + expect(logged_completions.last).to match(/compressed\.log\.(gzip|gz)$/) + end + end + end +end diff --git a/spec/inputs/file_spec.rb b/spec/inputs/file_tail_spec.rb similarity index 79% rename from spec/inputs/file_spec.rb rename to spec/inputs/file_tail_spec.rb index 85df88d..369906e 100644 --- a/spec/inputs/file_spec.rb +++ b/spec/inputs/file_tail_spec.rb @@ -1,14 +1,18 @@ # encoding: utf-8 -require_relative "../spec_helper" + +require "helpers/spec_helper" require "logstash/inputs/file" + require "tempfile" require "stud/temporary" require "logstash/codecs/multiline" -FILE_DELIMITER = LogStash::Environment.windows? ? "\r\n" : "\n" +# LogStash::Logging::Logger::configure_logging("DEBUG") + +TEST_FILE_DELIMITER = LogStash::Environment.windows? ? "\r\n" : "\n" describe LogStash::Inputs::File do - describe "testing with input(conf) do |pipeline, queue|" do + describe "'tail' mode testing with input(conf) do |pipeline, queue|" do it_behaves_like "an interruptible input plugin" do let(:config) do { @@ -29,7 +33,7 @@ path => "#{tmpfile_path}" start_position => "beginning" sincedb_path => "#{sincedb_path}" - delimiter => "#{FILE_DELIMITER}" + delimiter => "#{TEST_FILE_DELIMITER}" } } CONFIG @@ -43,12 +47,10 @@ events = input(conf) do |pipeline, queue| 2.times.collect { queue.pop } end - - insist { events[0].get("message") } == "hello" - insist { events[1].get("message") } == "world" + expect(events.map{|e| e.get("message")}).to contain_exactly("hello", "world") end - it "should restarts at the sincedb value" do + it "should restart at the sincedb value" do tmpfile_path = Stud::Temporary.pathname sincedb_path = Stud::Temporary.pathname @@ -59,7 +61,7 @@ path => "#{tmpfile_path}" start_position => "beginning" sincedb_path => "#{sincedb_path}" - delimiter => "#{FILE_DELIMITER}" + delimiter => "#{TEST_FILE_DELIMITER}" } } CONFIG @@ -73,8 +75,7 @@ 2.times.collect { queue.pop } end - insist { events[0].get("message") } == "hello3" - insist { events[1].get("message") } == "world3" + expect(events.map{|e| e.get("message")}).to contain_exactly("hello3", "world3") File.open(tmpfile_path, "a") do |fd| fd.puts("foo") @@ -86,10 +87,8 @@ events = input(conf) do |pipeline, queue| 3.times.collect { queue.pop } end - - insist { events[0].get("message") } == "foo" - insist { events[1].get("message") } == "bar" - insist { events[2].get("message") } == "baz" + messages = events.map{|e| e.get("message")} + expect(messages).to contain_exactly("foo", "bar", "baz") end it "should not overwrite existing path and host fields" do @@ -103,7 +102,7 @@ path => "#{tmpfile_path}" start_position => "beginning" sincedb_path => "#{sincedb_path}" - delimiter => "#{FILE_DELIMITER}" + delimiter => "#{TEST_FILE_DELIMITER}" codec => "json" } } @@ -119,13 +118,15 @@ 2.times.collect { queue.pop } end - insist { events[0].get("path") } == "my_path" - insist { events[0].get("host") } == "my_host" - insist { events[0].get("[@metadata][host]") } == "#{Socket.gethostname.force_encoding(Encoding::UTF_8)}" + existing_path_index, added_path_index = "my_val" == events[0].get("my_field") ? [1,0] : [0,1] - insist { events[1].get("path") } == "#{tmpfile_path}" - insist { events[1].get("host") } == "#{Socket.gethostname.force_encoding(Encoding::UTF_8)}" - insist { events[1].get("[@metadata][host]") } == "#{Socket.gethostname.force_encoding(Encoding::UTF_8)}" + expect(events[existing_path_index].get("path")).to eq "my_path" + expect(events[existing_path_index].get("host")).to eq "my_host" + expect(events[existing_path_index].get("[@metadata][host]")).to eq "#{Socket.gethostname.force_encoding(Encoding::UTF_8)}" + + expect(events[added_path_index].get("path")).to eq "#{tmpfile_path}" + expect(events[added_path_index].get("host")).to eq "#{Socket.gethostname.force_encoding(Encoding::UTF_8)}" + expect(events[added_path_index].get("[@metadata][host]")).to eq "#{Socket.gethostname.force_encoding(Encoding::UTF_8)}" end it "should read old files" do @@ -153,14 +154,14 @@ events = input(conf) do |pipeline, queue| 2.times.collect { queue.pop } end - - insist { events[0].get("path") } == "my_path" - insist { events[0].get("host") } == "my_host" - insist { events[0].get("[@metadata][host]") } == "#{Socket.gethostname.force_encoding(Encoding::UTF_8)}" - - insist { events[1].get("path") } == "#{tmpfile_path}" - insist { events[1].get("host") } == "#{Socket.gethostname.force_encoding(Encoding::UTF_8)}" - insist { events[1].get("[@metadata][host]") } == "#{Socket.gethostname.force_encoding(Encoding::UTF_8)}" + existing_path_index, added_path_index = "my_val" == events[0].get("my_field") ? [1,0] : [0,1] + expect(events[existing_path_index].get("path")).to eq "my_path" + expect(events[existing_path_index].get("host")).to eq "my_host" + expect(events[existing_path_index].get("[@metadata][host]")).to eq "#{Socket.gethostname.force_encoding(Encoding::UTF_8)}" + + expect(events[added_path_index].get("path")).to eq "#{tmpfile_path}" + expect(events[added_path_index].get("host")).to eq "#{Socket.gethostname.force_encoding(Encoding::UTF_8)}" + expect(events[added_path_index].get("[@metadata][host]")).to eq "#{Socket.gethostname.force_encoding(Encoding::UTF_8)}" end context "when sincedb_path is an existing directory" do @@ -207,17 +208,17 @@ "sincedb_path" => sincedb_path, "stat_interval" => 0.1, "codec" => mlcodec, - "delimiter" => FILE_DELIMITER) - subject.register + "delimiter" => TEST_FILE_DELIMITER) end it "reads the appended data only" do + subject.register RSpec::Sequencing - .run_after(0.1, "assert zero events then append two lines") do + .run_after(0.2, "assert zero events then append two lines") do expect(events.size).to eq(0) File.open(tmpfile_path, "a") { |fd| fd.puts("hello"); fd.puts("world") } end - .then_after(0.25, "quit") do + .then_after(0.4, "quit") do subject.stop end @@ -250,7 +251,7 @@ "stat_interval" => 0.02, "codec" => codec, "close_older" => 0.5, - "delimiter" => FILE_DELIMITER) + "delimiter" => TEST_FILE_DELIMITER) subject.register end @@ -294,7 +295,7 @@ "stat_interval" => 0.02, "codec" => codec, "ignore_older" => 1, - "delimiter" => FILE_DELIMITER) + "delimiter" => TEST_FILE_DELIMITER) subject.register Thread.new { subject.run(events) } @@ -320,7 +321,7 @@ "sincedb_path" => sincedb_path, "stat_interval" => 0.05, "codec" => mlcodec, - "delimiter" => FILE_DELIMITER) + "delimiter" => TEST_FILE_DELIMITER) subject.register end @@ -355,13 +356,13 @@ if e1_message.start_with?('line1.1-of-z') expect(e1.get("path")).to match(/z.log/) expect(e2.get("path")).to match(/A.log/) - expect(e1_message).to eq("line1.1-of-z#{FILE_DELIMITER} line1.2-of-z#{FILE_DELIMITER} line1.3-of-z") - expect(e2_message).to eq("line1.1-of-a#{FILE_DELIMITER} line1.2-of-a#{FILE_DELIMITER} line1.3-of-a") + expect(e1_message).to eq("line1.1-of-z#{TEST_FILE_DELIMITER} line1.2-of-z#{TEST_FILE_DELIMITER} line1.3-of-z") + expect(e2_message).to eq("line1.1-of-a#{TEST_FILE_DELIMITER} line1.2-of-a#{TEST_FILE_DELIMITER} line1.3-of-a") else expect(e1.get("path")).to match(/A.log/) expect(e2.get("path")).to match(/z.log/) - expect(e1_message).to eq("line1.1-of-a#{FILE_DELIMITER} line1.2-of-a#{FILE_DELIMITER} line1.3-of-a") - expect(e2_message).to eq("line1.1-of-z#{FILE_DELIMITER} line1.2-of-z#{FILE_DELIMITER} line1.3-of-z") + expect(e1_message).to eq("line1.1-of-a#{TEST_FILE_DELIMITER} line1.2-of-a#{TEST_FILE_DELIMITER} line1.3-of-a") + expect(e2_message).to eq("line1.1-of-z#{TEST_FILE_DELIMITER} line1.2-of-z#{TEST_FILE_DELIMITER} line1.3-of-z") end end subject.run(events) @@ -385,7 +386,7 @@ e1 = events.first e1_message = e1.get("message") expect(e1["path"]).to match(/a.log/) - expect(e1_message).to eq("line1.1-of-a#{FILE_DELIMITER} line1.2-of-a#{FILE_DELIMITER} line1.3-of-a") + expect(e1_message).to eq("line1.1-of-a#{TEST_FILE_DELIMITER} line1.2-of-a#{TEST_FILE_DELIMITER} line1.3-of-a") end .then("stop") do subject.stop @@ -400,7 +401,6 @@ context "when #run is called multiple times", :unix => true do let(:file_path) { "#{tmpdir_path}/a.log" } let(:buffer) { [] } - let(:lsof) { [] } let(:run_thread_proc) do lambda { Thread.new { subject.run(buffer) } } end @@ -424,17 +424,20 @@ end end - it "should only have one set of files open" do + it "should only actually open files when content changes are detected" do subject.register expect(lsof_proc.call).to eq("") + # first run processes the file and records sincedb progress run_thread_proc.call - sleep 0.25 - first_lsof = lsof_proc.call - expect(first_lsof.scan(file_path).size).to eq(1) + wait(1).for{lsof_proc.call.scan(file_path).size}.to eq(1) + # second run quits the first run + # sees the file has not changed size and does not open it run_thread_proc.call - sleep 0.25 - second_lsof = lsof_proc.call - expect(second_lsof.scan(file_path).size).to eq(1) + wait(1).for{lsof_proc.call.scan(file_path).size}.to eq(0) + # truncate and write less than before + File.open(file_path, "w"){ |fd| fd.puts('baz'); fd.fsync } + # sees the file has changed size and does open it + wait(1).for{lsof_proc.call.scan(file_path).size}.to eq(1) end end @@ -463,7 +466,7 @@ "stat_interval" => 0.1, "max_open_files" => 1, "start_position" => "beginning", - "delimiter" => FILE_DELIMITER) + "delimiter" => TEST_FILE_DELIMITER) subject.register end it "collects line events from only one file" do @@ -502,7 +505,7 @@ "max_open_files" => 1, "close_older" => 0.5, "start_position" => "beginning", - "delimiter" => FILE_DELIMITER) + "delimiter" => TEST_FILE_DELIMITER) subject.register end diff --git a/src/main/java/JrubyFileWatchService.java b/src/main/java/JrubyFileWatchService.java new file mode 100644 index 0000000..969147c --- /dev/null +++ b/src/main/java/JrubyFileWatchService.java @@ -0,0 +1,11 @@ +import org.jruby.Ruby; +import org.jruby.runtime.load.BasicLibraryService; +import org.logstash.filewatch.JrubyFileWatchLibrary; + +public class JrubyFileWatchService implements BasicLibraryService { + @Override + public final boolean basicLoad(final Ruby runtime) { + new JrubyFileWatchLibrary().load(runtime, false); + return true; + } +} diff --git a/src/main/java/org/logstash/filewatch/JrubyFileWatchLibrary.java b/src/main/java/org/logstash/filewatch/JrubyFileWatchLibrary.java new file mode 100644 index 0000000..7a859a1 --- /dev/null +++ b/src/main/java/org/logstash/filewatch/JrubyFileWatchLibrary.java @@ -0,0 +1,191 @@ +package org.logstash.filewatch; + +/** + * Created with IntelliJ IDEA. User: efrey Date: 6/11/13 Time: 11:00 AM To + * change this template use File | Settings | File Templates. + * + * http://bugs.sun.com/view_bug.do?bug_id=6357433 + * [Guy] modified original to be a proper JRuby class + * [Guy] do we need this anymore? JRuby 1.7+ uses new Java 7 File API + * + * + * fnv code extracted and modified from https://github.com/jakedouglas/fnv-java + */ + +import org.jruby.Ruby; +import org.jruby.RubyBignum; +import org.jruby.RubyClass; +import org.jruby.RubyFixnum; +import org.jruby.RubyIO; +import org.jruby.RubyInteger; +import org.jruby.RubyModule; +import org.jruby.RubyObject; +import org.jruby.RubyString; +import org.jruby.anno.JRubyClass; +import org.jruby.anno.JRubyMethod; +import org.jruby.runtime.Arity; +import org.jruby.runtime.ThreadContext; +import org.jruby.runtime.builtin.IRubyObject; +import org.jruby.runtime.load.Library; + +import java.io.IOException; +import java.math.BigInteger; +import java.nio.channels.Channel; +import java.nio.channels.FileChannel; +import java.nio.file.FileSystems; +import java.nio.file.OpenOption; +import java.nio.file.Path; +import java.nio.file.StandardOpenOption; + +@SuppressWarnings("ClassUnconnectedToPackage") +public class JrubyFileWatchLibrary implements Library { + + private static final BigInteger INIT32 = new BigInteger("811c9dc5", 16); + private static final BigInteger INIT64 = new BigInteger("cbf29ce484222325", 16); + private static final BigInteger PRIME32 = new BigInteger("01000193", 16); + private static final BigInteger PRIME64 = new BigInteger("100000001b3", 16); + private static final BigInteger MOD32 = new BigInteger("2").pow(32); + private static final BigInteger MOD64 = new BigInteger("2").pow(64); + + @Override + public final void load(final Ruby runtime, final boolean wrap) { + final RubyModule module = runtime.defineModule("FileWatch"); + + RubyClass clazz = runtime.defineClassUnder("FileExt", runtime.getObject(), JrubyFileWatchLibrary.RubyFileExt::new, module); + clazz.defineAnnotatedMethods(JrubyFileWatchLibrary.RubyFileExt.class); + + clazz = runtime.defineClassUnder("Fnv", runtime.getObject(), JrubyFileWatchLibrary.Fnv::new, module); + clazz.defineAnnotatedMethods(JrubyFileWatchLibrary.Fnv.class); + + } + + @JRubyClass(name = "FileExt", parent = "Object") + public static class RubyFileExt extends RubyObject { + + public RubyFileExt(final Ruby runtime, final RubyClass metaClass) { + super(runtime, metaClass); + } + + public RubyFileExt(final RubyClass metaClass) { + super(metaClass); + } + + @JRubyMethod(name = "open", required = 1, meta = true) + public static RubyIO open(final ThreadContext context, final IRubyObject self, final RubyString path) throws IOException { + final Path javapath = FileSystems.getDefault().getPath(path.asJavaString()); + final OpenOption[] options = new OpenOption[1]; + options[0] = StandardOpenOption.READ; + final Channel channel = FileChannel.open(javapath, options); + return new RubyIO(Ruby.getGlobalRuntime(), channel); + } + } + + // This class may be used by fingerprinting in the future + @SuppressWarnings({"NewMethodNamingConvention", "ChainOfInstanceofChecks"}) + @JRubyClass(name = "Fnv", parent = "Object") + public static class Fnv extends RubyObject { + + private byte[] bytes; + private long size; + private boolean open; + + public Fnv(final Ruby runtime, final RubyClass metaClass) { + super(runtime, metaClass); + } + + public Fnv(final RubyClass metaClass) { + super(metaClass); + } + + @JRubyMethod(name = "coerce_bignum", meta = true, required = 1) + public static IRubyObject coerceBignum(final ThreadContext ctx, final IRubyObject recv, final IRubyObject i) { + if (i instanceof RubyBignum) { + return i; + } + if (i instanceof RubyFixnum) { + return RubyBignum.newBignum(ctx.runtime, ((RubyFixnum)i).getBigIntegerValue()); + } + throw ctx.runtime.newRaiseException(ctx.runtime.getClass("StandardError"), "Can't coerce"); + } + + // def initialize(data) + @JRubyMethod(name = "initialize", required = 1) + public IRubyObject rubyInitialize(final ThreadContext ctx, final RubyString data) { + bytes = data.getBytes(); + size = (long) bytes.length; + open = true; + return ctx.nil; + } + + @JRubyMethod(name = "close") + public IRubyObject close(final ThreadContext ctx) { + open = false; + bytes = null; + return ctx.nil; + } + + @JRubyMethod(name = "open?") + public IRubyObject open_p(final ThreadContext ctx) { + if(open) { + return ctx.runtime.getTrue(); + } + return ctx.runtime.getFalse(); + } + + @JRubyMethod(name = "closed?") + public IRubyObject closed_p(final ThreadContext ctx) { + if(open) { + return ctx.runtime.getFalse(); + } + return ctx.runtime.getTrue(); + } + + @JRubyMethod(name = "fnv1a32", optional = 1) + public IRubyObject fnv1a_32(final ThreadContext ctx, IRubyObject[] args) { + IRubyObject[] args1 = args; + if(open) { + args1 = Arity.scanArgs(ctx.runtime, args1, 0, 1); + return RubyBignum.newBignum(ctx.runtime, common_fnv(args1[0], INIT32, PRIME32, MOD32)); + } + throw ctx.runtime.newRaiseException(ctx.runtime.getClass("StandardError"), "Fnv instance is closed!"); + } + + @JRubyMethod(name = "fnv1a64", optional = 1) + public IRubyObject fnv1a_64(final ThreadContext ctx, IRubyObject[] args) { + IRubyObject[] args1 = args; + if(open) { + args1 = Arity.scanArgs(ctx.runtime, args1, 0, 1); + return RubyBignum.newBignum(ctx.runtime, common_fnv(args1[0], INIT64, PRIME64, MOD64)); + } + throw ctx.runtime.newRaiseException(ctx.runtime.getClass("StandardError"), "Fnv instance is closed!"); + } + + private long convertLong(final IRubyObject obj) { + if(obj instanceof RubyInteger) { + return ((RubyInteger)obj).getLongValue(); + } + if(obj instanceof RubyFixnum) { + return ((RubyFixnum)obj).getLongValue(); + } + + return size; + } + + private BigInteger common_fnv(final IRubyObject len, final BigInteger hash, final BigInteger prime, final BigInteger mod) { + long converted = convertLong(len); + + if (converted > size) { + converted = size; + } + + BigInteger tempHash = hash; + for (int idx = 0; (long) idx < converted; idx++) { + tempHash = tempHash.xor(BigInteger.valueOf((long) ((int) bytes[idx] & 0xff))); + tempHash = tempHash.multiply(prime).mod(mod); + } + + return tempHash; + } + } + +}