module Gtn::PublicationTimes

Module for obtaining original publication times of files. It walks the git history to record the last time a file was modified. This is faster than talking to the file system.

Public Class Methods

cached_command() click to toggle source
# File _plugins/gtn/mod.rb, line 184
def self.cached_command
  return command if discover_caches.nil?

  Jekyll.logger.info '[GTN/Time/Pub] Using cached publication times'

  previous_commit = discover_caches.split('-').last.split('.').first
  previous = File.read(discover_caches)

  `git log --first-parent --name-status --diff-filter=AR --pretty='GTN_GTN:%ct' #{previous_commit}..` + previous
end
chase_rename(renames, path, depth: 0) click to toggle source
# File _plugins/gtn/mod.rb, line 119
def self.chase_rename(renames, path, depth: 0)
  if renames.key? path
    # TODO(hexylena)
    # This happens because it's the wrong datastructure, if there's a loop
    # in there, it'll just cycle through it endlessly.
    # This is obviously bad. But it'll do for now because it doesn't affect
    # any of our core files. We should replace this in the future.
    # This is why we have the grep_v below, to weed out the problematic files.
    if depth > 10
      Jekyll.logger.error "[GTN/Time/Pub] Too many renames for #{path}"
      path
    else
      chase_rename(renames, renames[path], depth: depth + 1)
    end
  else
    path
  end
end
clean_path(f) click to toggle source
# File _plugins/gtn/mod.rb, line 203
def self.clean_path(f)
  if f =~ %r{^\./}
    f[2..]
  elsif f =~ %r{^/}
    f.gsub(ROOT_PATH, '')
  else
    f
  end
end
command() click to toggle source
# File _plugins/gtn/mod.rb, line 195
def self.command
  `git log --first-parent --name-status --diff-filter=AR --pretty='GTN_GTN:%ct' `
end
discover_caches() click to toggle source
# File _plugins/gtn/mod.rb, line 165
def self.discover_caches
  # Really there should only be one, but maybe someone's been silly so
  # we'll just take the first one we find.
  Dir.glob('metadata/git-pub-*.txt').first
end
generate_cache() click to toggle source
# File _plugins/gtn/mod.rb, line 171
def self.generate_cache
  rev = `git rev-list -n 1 main`.strip

  if discover_caches.nil?
    File.write("metadata/git-pub-#{rev}.txt", command)
  else
    prev = discover_caches
    results = cached_command
    File.delete(prev)
    File.write("metadata/git-pub-#{rev}.txt", results)
  end
end
init_cache() click to toggle source
# File _plugins/gtn/mod.rb, line 138
def self.init_cache
  return unless @@TIME_CACHE.nil?

  @@TIME_CACHE = {}
  renames = {}

  Jekyll.logger.info '[GTN/Time/Pub] Filling Publication Time Cache'
  cached_command
    .split('GTN_GTN:')
    .map { |x| x.split("\n\n") }
    .select { |x| x.length > 1 }
    .each do |date, files|
    files.split("\n").grep_v(/\.(png|json|_ga|jpg)/).each do |f|
      modification_type, path = f.split("\t")
      if modification_type == 'A'
        # Chase the renames.
        final_filename = chase_rename(renames, path)
        @@TIME_CACHE[final_filename] = Time.at(date.to_i)
      elsif modification_type[0] == 'R'
        _, moved_from, moved_to = f.split("\t")
        renames[moved_from] = moved_to # Point from the 'older' version to the newer.
      end
    end
  end
  # pp renames
end
obtain_time(f_unk) click to toggle source
# File _plugins/gtn/mod.rb, line 213
def self.obtain_time(f_unk)
  f = clean_path(f_unk)
  init_cache
  if @@TIME_CACHE.key? f
    @@TIME_CACHE[f]
  else
    begin
      # Non git file.
      @@TIME_CACHE[f] = File.mtime(f)
      Jekyll.logger.warn "[GTN/Time/Pub] No git cached time available for #{f}, defaulting to checkout"
      @@TIME_CACHE[f]
    rescue StandardError
      Time.at(0)
    end
  end
end
time_cache() click to toggle source
# File _plugins/gtn/mod.rb, line 199
def self.time_cache
  @@TIME_CACHE
end