class Fluent::Plugin::Buffer

Constants

BufferQueueLimitError
DEFAULT_CHUNK_FULL_THRESHOLD
DEFAULT_CHUNK_LIMIT_SIZE
DEFAULT_TOTAL_LIMIT_SIZE
MINIMUM_APPEND_ATTEMPT_RECORDS
Metadata
STATS_KEYS

Attributes

available_buffer_space_ratios_metrics[R]
dequeued[R]

for tests

newest_timekey_metrics[R]
oldest_timekey_metrics[R]
queue[R]

for tests

queue_length_metrics[R]

for metrics

queue_size_metrics[R]

for metrics

queued_num[R]

for tests

stage[R]

for tests

stage_length_metrics[R]

for metrics

stage_size_metrics[R]

for metrics

total_queued_size_metrics[R]

Public Class Methods

new() click to toggle source
Calls superclass method Fluent::PluginId::new
# File lib/fluent/plugin/buffer.rb, line 169
def initialize
  super

  @chunk_limit_size = nil
  @total_limit_size = nil
  @queue_limit_length = nil
  @chunk_limit_records = nil

  @stage = {}    #=> Hash (metadata -> chunk) : not flushed yet
  @queue = []    #=> Array (chunks)           : already flushed (not written)
  @dequeued = {} #=> Hash (unique_id -> chunk): already written (not purged)
  @queued_num = {} # metadata => int (number of queued chunks)
  @dequeued_num = {} # metadata => int (number of dequeued chunks)

  @stage_length_metrics = nil
  @stage_size_metrics = nil
  @queue_length_metrics = nil
  @queue_size_metrics = nil
  @available_buffer_space_ratios_metrics = nil
  @total_queued_size_metrics = nil
  @newest_timekey_metrics = nil
  @oldest_timekey_metrics = nil
  @timekeys = Hash.new(0)
  @enable_update_timekeys = false
  @mutex = Mutex.new
end

Public Instance Methods

chunk_size_full?(chunk) click to toggle source
# File lib/fluent/plugin/buffer.rb, line 638
def chunk_size_full?(chunk)
  chunk.bytesize >= @chunk_limit_size * @chunk_full_threshold || (@chunk_limit_records && chunk.size >= @chunk_limit_records * @chunk_full_threshold)
end
chunk_size_over?(chunk) click to toggle source
# File lib/fluent/plugin/buffer.rb, line 634
def chunk_size_over?(chunk)
  chunk.bytesize > @chunk_limit_size || (@chunk_limit_records && chunk.size > @chunk_limit_records)
end
clear_queue!() click to toggle source
# File lib/fluent/plugin/buffer.rb, line 616
def clear_queue!
  log.on_trace { log.trace "clearing queue", instance: self.object_id }

  synchronize do
    until @queue.empty?
      begin
        q = @queue.shift
        log.trace("purging a chunk in queue"){ {id: dump_unique_id_hex(chunk.unique_id), bytesize: chunk.bytesize, size: chunk.size} }
        q.purge
      rescue => e
        log.error "unexpected error while clearing buffer queue", error_class: e.class, error: e
        log.error_backtrace
      end
    end
    @queue_size_metrics.set(0)
  end
end
close() click to toggle source
Calls superclass method Fluent::Plugin::Base#close
# File lib/fluent/plugin/buffer.rb, line 266
def close
  super
  synchronize do
    log.debug "closing buffer", instance: self.object_id
    @dequeued.each_pair do |chunk_id, chunk|
      chunk.close
    end
    until @queue.empty?
      @queue.shift.close
    end
    @stage.each_pair do |metadata, chunk|
      chunk.close
    end
  end
end
configure(conf) click to toggle source
Calls superclass method Fluent::PluginId#configure
# File lib/fluent/plugin/buffer.rb, line 216
def configure(conf)
  super

  unless @queue_limit_length.nil?
    @total_limit_size = @chunk_limit_size * @queue_limit_length
  end
  @stage_length_metrics = metrics_create(namespace: "fluentd", subsystem: "buffer", name: "stage_length",
                                         help_text: 'Length of stage buffers', prefer_gauge: true)
  @stage_length_metrics.set(0)
  @stage_size_metrics = metrics_create(namespace: "fluentd", subsystem: "buffer", name: "stage_byte_size",
                                       help_text: 'Total size of stage buffers', prefer_gauge: true)
  @stage_size_metrics.set(0) # Ensure zero.
  @queue_length_metrics = metrics_create(namespace: "fluentd", subsystem: "buffer", name: "queue_length",
                                         help_text: 'Length of queue buffers', prefer_gauge: true)
  @queue_length_metrics.set(0)
  @queue_size_metrics = metrics_create(namespace: "fluentd", subsystem: "buffer", name: "queue_byte_size",
                                       help_text: 'Total size of queue buffers', prefer_gauge: true)
  @queue_size_metrics.set(0) # Ensure zero.
  @available_buffer_space_ratios_metrics = metrics_create(namespace: "fluentd", subsystem: "buffer", name: "available_buffer_space_ratios",
                                                          help_text: 'Ratio of available space in buffer', prefer_gauge: true)
  @available_buffer_space_ratios_metrics.set(100) # Default is 100%.
  @total_queued_size_metrics = metrics_create(namespace: "fluentd", subsystem: "buffer", name: "total_queued_size",
                                              help_text: 'Total size of stage and queue buffers', prefer_gauge: true)
  @total_queued_size_metrics.set(0)
  @newest_timekey_metrics = metrics_create(namespace: "fluentd", subsystem: "buffer", name: "newest_timekey",
                                           help_text: 'Newest timekey in buffer', prefer_gauge: true)
  @oldest_timekey_metrics = metrics_create(namespace: "fluentd", subsystem: "buffer", name: "oldest_timekey",
                                           help_text: 'Oldest timekey in buffer', prefer_gauge: true)
end
dequeue_chunk() click to toggle source
# File lib/fluent/plugin/buffer.rb, line 554
def dequeue_chunk
  return nil if @queue.empty?
  log.on_trace { log.trace "dequeueing a chunk", instance: self.object_id }

  synchronize do
    chunk = @queue.shift

    # this buffer is dequeued by other thread just before "synchronize" in this thread
    return nil unless chunk

    @dequeued[chunk.unique_id] = chunk
    @queued_num[chunk.metadata] -= 1 # BUG if nil, 0 or subzero
    @dequeued_num[chunk.metadata] ||= 0
    @dequeued_num[chunk.metadata] += 1
    log.trace "chunk dequeued", instance: self.object_id, metadata: chunk.metadata
    chunk
  end
end
enable_update_timekeys() click to toggle source
# File lib/fluent/plugin/buffer.rb, line 246
def enable_update_timekeys
  @enable_update_timekeys = true
end
enqueue_all(force_enqueue = false) { |metadata, chunk| ... } click to toggle source

At flush_at_shutdown, all staged chunks should be enqueued for buffer flush. Set true to force_enqueue for it.

# File lib/fluent/plugin/buffer.rb, line 532
def enqueue_all(force_enqueue = false)
  log.on_trace { log.trace "enqueueing all chunks in buffer", instance: self.object_id }
  update_timekeys if @enable_update_timekeys

  if block_given?
    synchronize{ @stage.keys }.each do |metadata|
      return if !force_enqueue && queue_full?
      # NOTE: The following line might cause data race depending on Ruby implementations except CRuby
      # cf. https://github.com/fluent/fluentd/pull/1721#discussion_r146170251
      chunk = @stage[metadata]
      next unless chunk
      v = yield metadata, chunk
      enqueue_chunk(metadata) if v
    end
  else
    synchronize{ @stage.keys }.each do |metadata|
      return if !force_enqueue && queue_full?
      enqueue_chunk(metadata)
    end
  end
end
enqueue_chunk(metadata) click to toggle source
# File lib/fluent/plugin/buffer.rb, line 477
def enqueue_chunk(metadata)
  log.on_trace { log.trace "enqueueing chunk", instance: self.object_id, metadata: metadata }

  chunk = synchronize do
    @stage.delete(metadata)
  end
  return nil unless chunk

  chunk.synchronize do
    synchronize do
      if chunk.empty?
        chunk.close
      else
        chunk.metadata.seq = 0 # metadata.seq should be 0 for counting @queued_num
        @queue << chunk
        @queued_num[metadata] = @queued_num.fetch(metadata, 0) + 1
        chunk.enqueued!
      end
      bytesize = chunk.bytesize
      @stage_size_metrics.sub(bytesize)
      @queue_size_metrics.add(bytesize)
    end
  end
  nil
end
enqueue_unstaged_chunk(chunk) click to toggle source
# File lib/fluent/plugin/buffer.rb, line 503
def enqueue_unstaged_chunk(chunk)
  log.on_trace { log.trace "enqueueing unstaged chunk", instance: self.object_id, metadata: chunk.metadata }

  synchronize do
    chunk.synchronize do
      metadata = chunk.metadata
      metadata.seq = 0 # metadata.seq should be 0 for counting @queued_num
      @queue << chunk
      @queued_num[metadata] = @queued_num.fetch(metadata, 0) + 1
      chunk.enqueued!
    end
    @queue_size_metrics.add(chunk.bytesize)
  end
end
generate_chunk(metadata) click to toggle source
# File lib/fluent/plugin/buffer.rb, line 305
def generate_chunk(metadata)
  raise NotImplementedError, "Implement this method in child class"
end
metadata(timekey: nil, tag: nil, variables: nil) click to toggle source

Keep this method for existing code

# File lib/fluent/plugin/buffer.rb, line 314
def metadata(timekey: nil, tag: nil, variables: nil)
  Metadata.new(timekey, tag, variables)
end
new_metadata(timekey: nil, tag: nil, variables: nil) click to toggle source
# File lib/fluent/plugin/buffer.rb, line 309
def new_metadata(timekey: nil, tag: nil, variables: nil)
  Metadata.new(timekey, tag, variables)
end
persistent?() click to toggle source
# File lib/fluent/plugin/buffer.rb, line 212
def persistent?
  false
end
purge_chunk(chunk_id) click to toggle source
# File lib/fluent/plugin/buffer.rb, line 587
def purge_chunk(chunk_id)
  metadata = nil
  synchronize do
    chunk = @dequeued.delete(chunk_id)
    return nil unless chunk # purged by other threads

    metadata = chunk.metadata
    log.on_trace { log.trace "purging a chunk", instance: self.object_id, chunk_id: dump_unique_id_hex(chunk_id), metadata: metadata }

    begin
      bytesize = chunk.bytesize
      chunk.purge
      @queue_size_metrics.sub(bytesize)
    rescue => e
      log.error "failed to purge buffer chunk", chunk_id: dump_unique_id_hex(chunk_id), error_class: e.class, error: e
      log.error_backtrace
    end

    @dequeued_num[chunk.metadata] -= 1
    if metadata && !@stage[metadata] && (!@queued_num[metadata] || @queued_num[metadata] < 1) && @dequeued_num[metadata].zero?
      @queued_num.delete(metadata)
      @dequeued_num.delete(metadata)
    end
    log.trace "chunk purged", instance: self.object_id, chunk_id: dump_unique_id_hex(chunk_id), metadata: metadata
  end

  nil
end
queue_full?() click to toggle source
# File lib/fluent/plugin/buffer.rb, line 459
def queue_full?
  synchronize { @queue.size } >= @queued_chunks_limit_size
end
queue_size() click to toggle source
# File lib/fluent/plugin/buffer.rb, line 204
def queue_size
  @queue_size_metrics.get
end
queue_size=(value) click to toggle source
# File lib/fluent/plugin/buffer.rb, line 208
def queue_size=(value)
  @queue_size_metrics.set(value)
end
queued?(metadata = nil, optimistic: false) click to toggle source
# File lib/fluent/plugin/buffer.rb, line 467
def queued?(metadata = nil, optimistic: false)
  if optimistic
    optimistic_queued?(metadata)
  else
    synchronize do
      optimistic_queued?(metadata)
    end
  end
end
queued_records() click to toggle source
# File lib/fluent/plugin/buffer.rb, line 463
def queued_records
  synchronize { @queue.reduce(0){|r, chunk| r + chunk.size } }
end
resume() click to toggle source
TODO: for back pressure feature

def used?(ratio)

@total_limit_size * ratio > @stage_size_metrics.get + @queue_size_metrics.get

end

# File lib/fluent/plugin/buffer.rb, line 300
def resume
  # return {}, []
  raise NotImplementedError, "Implement this method in child class"
end
stage_size() click to toggle source
# File lib/fluent/plugin/buffer.rb, line 196
def stage_size
  @stage_size_metrics.get
end
stage_size=(value) click to toggle source
# File lib/fluent/plugin/buffer.rb, line 200
def stage_size=(value)
  @stage_size_metrics.set(value)
end
start() click to toggle source
Calls superclass method Fluent::Plugin::Base#start
# File lib/fluent/plugin/buffer.rb, line 250
def start
  super

  @stage, @queue = resume
  @stage.each_pair do |metadata, chunk|
    @stage_size_metrics.add(chunk.bytesize)
  end
  @queue.each do |chunk|
    @queued_num[chunk.metadata] ||= 0
    @queued_num[chunk.metadata] += 1
    @queue_size_metrics.add(chunk.bytesize)
  end
  update_timekeys
  log.debug "buffer started", instance: self.object_id, stage_size: @stage_size_metrics.get, queue_size: @queue_size_metrics.get
end
statistics() click to toggle source
# File lib/fluent/plugin/buffer.rb, line 877
def statistics
  stage_size, queue_size = @stage_size_metrics.get, @queue_size_metrics.get
  buffer_space = 1.0 - ((stage_size + queue_size * 1.0) / @total_limit_size)
  @stage_length_metrics.set(@stage.size)
  @queue_length_metrics.set(@queue.size)
  @available_buffer_space_ratios_metrics.set(buffer_space * 100)
  @total_queued_size_metrics.set(stage_size + queue_size)
  stats = {
    'stage_length' => @stage_length_metrics.get,
    'stage_byte_size' => stage_size,
    'queue_length' => @queue_length_metrics.get,
    'queue_byte_size' => queue_size,
    'available_buffer_space_ratios' => @available_buffer_space_ratios_metrics.get.round(1),
    'total_queued_size' => @total_queued_size_metrics.get,
  }

  tkeys = timekeys
  if (m = tkeys.min)
    @oldest_timekey_metrics.set(m)
    stats['oldest_timekey'] = @oldest_timekey_metrics.get
  end
  if (m = tkeys.max)
    @newest_timekey_metrics.set(m)
    stats['newest_timekey'] = @newest_timekey_metrics.get
  end

  { 'buffer' => stats }
end
storable?() click to toggle source
# File lib/fluent/plugin/buffer.rb, line 291
def storable?
  @total_limit_size > @stage_size_metrics.get + @queue_size_metrics.get
end
takeback_chunk(chunk_id) click to toggle source
# File lib/fluent/plugin/buffer.rb, line 573
def takeback_chunk(chunk_id)
  log.on_trace { log.trace "taking back a chunk", instance: self.object_id, chunk_id: dump_unique_id_hex(chunk_id) }

  synchronize do
    chunk = @dequeued.delete(chunk_id)
    return false unless chunk # already purged by other thread
    @queue.unshift(chunk)
    log.trace "chunk taken back", instance: self.object_id, chunk_id: dump_unique_id_hex(chunk_id), metadata: chunk.metadata
    @queued_num[chunk.metadata] += 1 # BUG if nil
    @dequeued_num[chunk.metadata] -= 1
  end
  true
end
terminate() click to toggle source
Calls superclass method Fluent::Plugin::Base#terminate
# File lib/fluent/plugin/buffer.rb, line 282
def terminate
  super
  @dequeued = @stage = @queue = @queued_num = nil
  @stage_length_metrics = @stage_size_metrics = @queue_length_metrics = @queue_size_metrics = nil
  @available_buffer_space_ratios_metrics = @total_queued_size_metrics = nil
  @newest_timekey_metrics = @oldest_timekey_metrics = nil
  @timekeys.clear
end
timekeys() click to toggle source
# File lib/fluent/plugin/buffer.rb, line 318
def timekeys
  @timekeys.keys
end
update_timekeys() click to toggle source
# File lib/fluent/plugin/buffer.rb, line 518
def update_timekeys
  synchronize do
    chunks = @stage.values
    chunks.concat(@queue)
    @timekeys = chunks.each_with_object({}) do |chunk, keys|
      if chunk.metadata && chunk.metadata.timekey
        t = chunk.metadata.timekey
        keys[t] = keys.fetch(t, 0) + 1
      end
    end
  end
end
write(metadata_and_data, format: nil, size: nil, enqueue: false) click to toggle source

metadata MUST have consistent object_id for each variation data MUST be Array of serialized events, or EventStream metadata_and_data MUST be a hash of { metadata => data }

# File lib/fluent/plugin/buffer.rb, line 325
def write(metadata_and_data, format: nil, size: nil, enqueue: false)
  return if metadata_and_data.size < 1
  raise BufferOverflowError, "buffer space has too many data" unless storable?

  log.on_trace { log.trace "writing events into buffer", instance: self.object_id, metadata_size: metadata_and_data.size }

  operated_chunks = []
  unstaged_chunks = {} # metadata => [chunk, chunk, ...]
  chunks_to_enqueue = []
  staged_bytesizes_by_chunk = {}
  # track internal BufferChunkOverflowError in write_step_by_step
  buffer_chunk_overflow_errors = []

  begin
    # sort metadata to get lock of chunks in same order with other threads
    metadata_and_data.keys.sort.each do |metadata|
      data = metadata_and_data[metadata]
      write_once(metadata, data, format: format, size: size) do |chunk, adding_bytesize, error|
        chunk.mon_enter # add lock to prevent to be committed/rollbacked from other threads
        operated_chunks << chunk
        if chunk.staged?
          #
          # https://github.com/fluent/fluentd/issues/2712
          # write_once is supposed to write to a chunk only once
          # but this block **may** run multiple times from write_step_by_step and previous write may be rollbacked
          # So we should be counting the stage_size only for the last successful write
          #
          staged_bytesizes_by_chunk[chunk] = adding_bytesize
        elsif chunk.unstaged?
          unstaged_chunks[metadata] ||= []
          unstaged_chunks[metadata] << chunk
        end
        if error && !error.empty?
          buffer_chunk_overflow_errors << error
        end
      end
    end

    return if operated_chunks.empty?

    # Now, this thread acquires many locks of chunks... getting buffer-global lock causes dead lock.
    # Any operations needs buffer-global lock (including enqueueing) should be done after releasing locks.

    first_chunk = operated_chunks.shift
    # Following commits for other chunks also can finish successfully if the first commit operation
    # finishes without any exceptions.
    # In most cases, #commit just requires very small disk spaces, so major failure reason are
    # permission errors, disk failures and other permanent(fatal) errors.
    begin
      first_chunk.commit
      if enqueue || first_chunk.unstaged? || chunk_size_full?(first_chunk)
        chunks_to_enqueue << first_chunk
      end
      first_chunk.mon_exit
    rescue
      operated_chunks.unshift(first_chunk)
      raise
    end

    errors = []
    # Buffer plugin estimates there's no serious error cause: will commit for all chunks eigher way
    operated_chunks.each do |chunk|
      begin
        chunk.commit
        if enqueue || chunk.unstaged? || chunk_size_full?(chunk)
          chunks_to_enqueue << chunk
        end
        chunk.mon_exit
      rescue => e
        chunk.rollback
        chunk.mon_exit
        errors << e
      end
    end

    # All locks about chunks are released.

    #
    # Now update the stage, stage_size with proper locking
    # FIX FOR stage_size miscomputation - https://github.com/fluent/fluentd/issues/2712
    #
    staged_bytesizes_by_chunk.each do |chunk, bytesize|
      chunk.synchronize do
        synchronize { @stage_size_metrics.add(bytesize) }
        log.on_trace { log.trace { "chunk #{chunk.path} size_added: #{bytesize} new_size: #{chunk.bytesize}" } }
      end
    end

    chunks_to_enqueue.each do |c|
      if c.staged? && (enqueue || chunk_size_full?(c))
        m = c.metadata
        enqueue_chunk(m)
        if unstaged_chunks[m]
          u = unstaged_chunks[m].pop
          u.synchronize do
            if u.unstaged? && !chunk_size_full?(u)
              # `u.metadata.seq` and `m.seq` can be different but Buffer#enqueue_chunk expect them to be the same value
              u.metadata.seq = 0
              synchronize {
                @stage[m] = u.staged!
                @stage_size_metrics.add(u.bytesize)
              }
            end
          end
        end
      elsif c.unstaged?
        enqueue_unstaged_chunk(c)
      else
        # previously staged chunk is already enqueued, closed or purged.
        # no problem.
      end
    end

    operated_chunks.clear if errors.empty?

    if errors.size > 0
      log.warn "error occurs in committing chunks: only first one raised", errors: errors.map(&:class)
      raise errors.first
    end
  ensure
    operated_chunks.each do |chunk|
      chunk.rollback rescue nil # nothing possible to do for #rollback failure
      if chunk.unstaged?
        chunk.purge rescue nil # to prevent leakage of unstaged chunks
      end
      chunk.mon_exit rescue nil # this may raise ThreadError for chunks already committed
    end
    unless buffer_chunk_overflow_errors.empty?
      # Notify delayed BufferChunkOverflowError here
      raise BufferChunkOverflowError, buffer_chunk_overflow_errors.join(", ")
    end
  end
end
write_once(metadata, data, format: nil, size: nil, &block) click to toggle source

write once into a chunk

  1. append whole data into existing chunk

  2. commit it & return unless chunk_size_over?

  3. enqueue existing chunk & retry whole method if chunk was not empty

  4. go to step_by_step writing

# File lib/fluent/plugin/buffer.rb, line 650
def write_once(metadata, data, format: nil, size: nil, &block)
  return if data.empty?

  stored = false
  adding_bytesize = nil

  chunk = synchronize { @stage[metadata] ||= generate_chunk(metadata).staged! }
  enqueue_chunk_before_retry = false
  chunk.synchronize do
    # retry this method if chunk is already queued (between getting chunk and entering critical section)
    raise ShouldRetry unless chunk.staged?

    empty_chunk = chunk.empty?

    original_bytesize = chunk.bytesize
    begin
      if format
        serialized = format.call(data)
        chunk.concat(serialized, size ? size.call : data.size)
      else
        chunk.append(data, compress: @compress)
      end
      adding_bytesize = chunk.bytesize - original_bytesize

      if chunk_size_over?(chunk)
        if format && empty_chunk
          if chunk.bytesize > @chunk_limit_size
            log.warn "chunk bytes limit exceeds for an emitted event stream: #{adding_bytesize}bytes"
          else
            log.warn "chunk size limit exceeds for an emitted event stream: #{chunk.size}records"
          end
        end
        chunk.rollback

        if format && !empty_chunk
          # Event streams should be appended into a chunk at once
          # as far as possible, to improve performance of formatting.
          # Event stream may be a MessagePackEventStream. We don't want to split it into
          # 2 or more chunks (except for a case that the event stream is larger than chunk limit).
          enqueue_chunk_before_retry = true
          raise ShouldRetry
        end
      else
        stored = true
      end
    rescue
      chunk.rollback
      raise
    end

    if stored
      block.call(chunk, adding_bytesize)
    end
  end

  unless stored
    # try step-by-step appending if data can't be stored into existing a chunk in non-bulk mode
    #
    # 1/10 size of original event stream (splits_count == 10) seems enough small
    # to try emitting events into existing chunk.
    # it does not matter to split event stream into very small splits, because chunks have less
    # overhead to write data many times (even about file buffer chunks).
    write_step_by_step(metadata, data, format, 10, &block)
  end
rescue ShouldRetry
  enqueue_chunk(metadata) if enqueue_chunk_before_retry
  retry
end
write_step_by_step(metadata, data, format, splits_count, &block) click to toggle source
  1. split event streams into many (10 -> 100 -> 1000 -> …) chunks

  2. append splits into the staged chunks as much as possible

  3. create unstaged chunk and append rest splits -> repeat it for all splits

# File lib/fluent/plugin/buffer.rb, line 726
def write_step_by_step(metadata, data, format, splits_count, &block)
  splits = []
  errors = []
  if splits_count > data.size
    splits_count = data.size
  end
  slice_size = if data.size % splits_count == 0
                 data.size / splits_count
               else
                 data.size / (splits_count - 1)
               end
  slice_origin = 0
  while slice_origin < data.size
    splits << data.slice(slice_origin, slice_size)
    slice_origin += slice_size
  end

  # This method will append events into the staged chunk at first.
  # Then, will generate chunks not staged (not queued) to append rest data.
  staged_chunk_used = false
  modified_chunks = []
  modified_metadata = metadata
  get_next_chunk = ->(){
    c = if staged_chunk_used
          # Staging new chunk here is bad idea:
          # Recovering whole state including newly staged chunks is much harder than current implementation.
          modified_metadata = modified_metadata.dup_next
          generate_chunk(modified_metadata)
        else
          synchronize { @stage[modified_metadata] ||= generate_chunk(modified_metadata).staged! }
        end
    modified_chunks << c
    c
  }

  writing_splits_index = 0
  enqueue_chunk_before_retry = false

  while writing_splits_index < splits.size
    chunk = get_next_chunk.call
    chunk.synchronize do
      raise ShouldRetry unless chunk.writable?
      staged_chunk_used = true if chunk.staged?

      original_bytesize = committed_bytesize = chunk.bytesize
      begin
        while writing_splits_index < splits.size
          split = splits[writing_splits_index]
          formatted_split = format ? format.call(split) : nil

          if split.size == 1 # Check BufferChunkOverflowError
            determined_bytesize = nil
            if @compress != :text
              determined_bytesize = nil
            elsif formatted_split
              determined_bytesize = formatted_split.bytesize
            elsif split.first.respond_to?(:bytesize)
              determined_bytesize = split.first.bytesize
            end

            if determined_bytesize && determined_bytesize > @chunk_limit_size
              # It is a obvious case that BufferChunkOverflowError should be raised here.
              # But if it raises here, already processed 'split' or
              # the proceeding 'split' will be lost completely.
              # So it is a last resort to delay raising such a exception
              errors << "a #{determined_bytesize} bytes record (nth: #{writing_splits_index}) is larger than buffer chunk limit size (#{@chunk_limit_size})"
              writing_splits_index += 1
              next
            end

            if determined_bytesize.nil? || chunk.bytesize + determined_bytesize > @chunk_limit_size
              # The split will (might) cause size over so keep already processed
              # 'split' content here (allow performance regression a bit).
              chunk.commit
              committed_bytesize = chunk.bytesize
            end
          end

          if format
            chunk.concat(formatted_split, split.size)
          else
            chunk.append(split, compress: @compress)
          end
          adding_bytes = chunk.bytesize - committed_bytesize

          if chunk_size_over?(chunk) # split size is larger than difference between size_full? and size_over?
            chunk.rollback
            committed_bytesize = chunk.bytesize

            if split.size == 1 # Check BufferChunkOverflowError again
              if adding_bytes > @chunk_limit_size
                errors << "concatenated/appended a #{adding_bytes} bytes record (nth: #{writing_splits_index}) is larger than buffer chunk limit size (#{@chunk_limit_size})"
                writing_splits_index += 1
                next
              else
                # As already processed content is kept after rollback, then unstaged chunk should be queued.
                # After that, re-process current split again.
                # New chunk should be allocated, to do it, modify @stage and so on.
                synchronize { @stage.delete(modified_metadata) }
                staged_chunk_used = false
                chunk.unstaged!
                break
              end
            end

            if chunk_size_full?(chunk) || split.size == 1
              enqueue_chunk_before_retry = true
            else
              splits_count *= 10
            end

            raise ShouldRetry
          end

          writing_splits_index += 1

          if chunk_size_full?(chunk)
            break
          end
        end
      rescue
        chunk.purge if chunk.unstaged? # unstaged chunk will leak unless purge it
        raise
      end

      block.call(chunk, chunk.bytesize - original_bytesize, errors)
      errors = []
    end
  end
rescue ShouldRetry
  modified_chunks.each do |mc|
    mc.rollback rescue nil
    if mc.unstaged?
      mc.purge rescue nil
    end
  end
  enqueue_chunk(metadata) if enqueue_chunk_before_retry
  retry
end

Private Instance Methods

optimistic_queued?(metadata = nil) click to toggle source
# File lib/fluent/plugin/buffer.rb, line 908
def optimistic_queued?(metadata = nil)
  if metadata
    n = @queued_num[metadata]
    n && n.nonzero?
  else
    !@queue.empty?
  end
end