module TopicFilter

The main GTN module to parse tutorials and topics into useful lists of things that can bes shown on topic pages

Public Class Methods

annotate_path(path, layout) click to toggle source

Annotation of a path with topic and tutorial information Params:

path

The path to annotate

Returns:

Hash

The annotation

Example:

/topics/assembly/tutorials/velvet-assembly/tutorial.md
=> {
  "topic" => "assembly",
  "topic_name" => "assembly",
  "material" => "assembly/velvet-assembly",
  "tutorial_name" => "velvet-assembly",
  "dir" => "topics/assembly/tutorials/velvet-assembly"
  "type" => "tutorial"
}
# File _plugins/jekyll-topic-filter.rb, line 249
def self.annotate_path(path, layout)
  parts = path.split('/')
  parts.shift if parts[0] == '.'

  return nil if parts[0] != 'topics'

  return nil if parts[2] != 'tutorials'

  return nil if parts.length < 4

  material = {
    'topic' => parts[1], # Duplicate
    'topic_name' => parts[1],
    'material' => "#{parts[1]}/#{parts[3]}",
    'tutorial_name' => parts[3],
    'dir' => parts[0..3].join('/'),
  }

  return nil if path =~ %r{/faqs/}

  return nil if parts[-1] =~ /data[_-]library.yaml/ || parts[-1] =~ /data[_-]manager.yaml/

  # Check if it's a symlink
  material['symlink'] = true if File.symlink?(material['dir'])

  if parts[4] =~ /tutorial.*\.md/ || layout == 'tutorial_hands_on'
    material['type'] = 'tutorial'
  elsif parts[4] =~ /slides.*\.html/ || %w[tutorial_slides base_slides introduction_slides].include?(layout)
    material['type'] = 'slides'
  elsif parts[4] =~ /ipynb$/
    material['type'] = 'ipynb'
  elsif parts[4] =~ /Rmd$/
    material['type'] = 'rmd'
  elsif parts[4] == 'workflows'
    material['type'] = 'workflow'
  elsif parts[4] == 'recordings'
    material['type'] = 'recordings'
  elsif parts[4] == 'tours'
    material['type'] = 'tour'
  elsif parts[-1] == 'index.md'
    return nil
  else
    return nil
    # material['type'] = 'unknown'
  end

  material
end
cache() click to toggle source
# File _plugins/jekyll-topic-filter.rb, line 34
def self.cache
  @@cache ||= Jekyll::Cache.new('JekyllTopicFilter')
end
collate_materials(site, pages) click to toggle source

Collate the materials into a large hash Params:

site

The Jekyll::Site object

pages

The list of pages to collate

Returns:

Hash

The collated materials

Example: collate_materials(site, pages)

> {

“assembly/velvet-assembly” => {

"topic" => "assembly",
"topic_name" => "assembly",
"material" => "assembly/velvet-assembly",
"tutorial_name" => "velvet-assembly",
"dir" => "topics/assembly/tutorials/velvet-assembly",
"resources" => [
  {
  "type" => "slides",
  "url" => "/topics/assembly/tutorials/velvet-assembly/slides.html",
  "title" => "Slides",
  "priority" => 1
  },
  {
  "type" => "tutorial",
  "url" => "/topics/assembly/tutorials/velvet-assembly/tutorial.html",
  "title" => "Tutorial",
  "priority" => 2
  }
 ]
}
# File _plugins/jekyll-topic-filter.rb, line 352
def self.collate_materials(site, pages)
  # In order to speed up queries later, we'll store a set of "interesting"
  # pages (i.e. things that are under `topic_name`)
  shortlinks = site.data['shortlinks']
  shortlinks_reversed = shortlinks['id'].invert

  interesting = {}
  pages.each do |page|
    # Skip anything outside of topics.
    next if !page.url.include?('/topics/')

    # Extract the material metadata based on the path
    page.data['url'] = page.url
    material_meta = annotate_path(page.path, page.data['layout'])

    # If unannotated then we want to skip this material.
    next if material_meta.nil?

    mk = material_meta['material']

    if !interesting.key? mk
      interesting[mk] = material_meta.dup
      interesting[mk].delete('type') # Remove the type since it's specific, not generic
      interesting[mk]['resources'] = []
    end

    page.data['topic_name'] = material_meta['topic_name']
    page.data['tutorial_name'] = material_meta['tutorial_name']
    page.data['dir'] = material_meta['dir']
    page.data['short_id'] = shortlinks_reversed[page.data['url']]
    page.data['symlink'] = material_meta['symlink']

    interesting[mk]['resources'].push([material_meta['type'], page])
  end

  interesting
end
enumerate_topics(site) click to toggle source

This function returns a list of all the topics that are available. Params:

site

The Jekyll::Site object

Returns:

Array

The topic objects themselves

# File _plugins/jekyll-topic-filter.rb, line 30
def self.enumerate_topics(site)
  list_topics_h(site).values
end
extract_workflow_tool_list(data) click to toggle source

Extract the list of tools used in a workflow Params:

data

The workflow data

Returns:

Array

The list of tool IDs

# File _plugins/jekyll-topic-filter.rb, line 224
def self.extract_workflow_tool_list(data)
  out = data['steps'].select { |_k, v| v['type'] == 'tool' }.map { |_k, v| v['tool_id'] }.compact
  out += data['steps'].select do |_k, v|
           v['type'] == 'subworkflow'
         end.map { |_k, v| extract_workflow_tool_list(v['subworkflow']) }
  out
end
fetch_tutorial_material(site, topic_name, tutorial_name) click to toggle source

Fetch a specific tutorial material by topic and tutorial name Params:

site

The Jekyll::Site object

topic_name

The name of the topic

tutorial_name

The name of the tutorial

Returns:

Hash

The tutorial material

# File _plugins/jekyll-topic-filter.rb, line 205
def self.fetch_tutorial_material(site, topic_name, tutorial_name)
  if topic_name.nil?
    return nil
  end
  fill_cache(site)
  if site.data['cache_topic_filter'][topic_name].nil?
    Jekyll.logger.warn "Cannot fetch tutorial material for #{topic_name}"
    nil
  else
    site.data['cache_topic_filter'][topic_name].select { |p| p['tutorial_name'] == tutorial_name }[0]
  end
end
fill_cache(site) click to toggle source

Fill the cache with all the topics Params:

site

The Jekyll::Site object

Returns: nil

# File _plugins/jekyll-topic-filter.rb, line 44
def self.fill_cache(site)
  return if site.data.key?('cache_topic_filter')

  Jekyll.logger.debug '[GTN/TopicFilter] Begin Cache Prefill'
  site.data['cache_topic_filter'] = {}

  # For each topic
  list_topics(site).each do |topic|
    site.data['cache_topic_filter'][topic] = filter_by_topic(site, topic)
  end
  Jekyll.logger.debug '[GTN/TopicFilter] End Cache Prefill'
end
filter_by_tag(site, topic_name) click to toggle source
# File _plugins/jekyll-topic-filter.rb, line 860
def self.filter_by_tag(site, topic_name)
  # Here we make a (cached) call to load materials into memory and sort them
  # properly.
  materials = process_pages(site, site.pages)

  # Select those with that topic ID or that tag
  resource_pages = materials.select { |x| x['topic_name'] == topic_name }
  resource_pages += materials.select { |x| (x['tags'] || []).include?(topic_name) }

  # The complete resources we'll return is the introduction slides first
  # (EDIT: not anymore, we rely on prioritisation!)
  # and then the rest of the pages.
  resource_pages = resource_pages.sort_by { |k| k.fetch('priority', 1) }

  Jekyll.logger.error "Error? Could not find any relevant tagged pages for #{topic_name}" if resource_pages.empty?

  resource_pages
end
filter_by_topic(site, topic_name) click to toggle source
# File _plugins/jekyll-topic-filter.rb, line 839
def self.filter_by_topic(site, topic_name)
  # Here we make a (cached) call to load materials into memory and sort them
  # properly.
  materials = process_pages(site, site.pages)

  # Select out the materials by topic:
  resource_pages = materials.select { |x| x['topic_name'] == topic_name }

  # If there is nothing with that topic name, try generating it by tags.
  resource_pages = materials.select { |x| (x['tags'] || []).include?(topic_name) } if resource_pages.empty?

  # The complete resources we'll return is the introduction slides first
  # (EDIT: not anymore, we rely on prioritisation!)
  # and then the rest of the pages.
  resource_pages = resource_pages.sort_by { |k| k.fetch('priority', 1) }

  Jekyll.logger.error "Error? Could not find any relevant pages for #{topic_name}" if resource_pages.empty?

  resource_pages
end
filter_by_topic_subtopic(site, topic_name, subtopic_id) click to toggle source

Filter a list of materials by topic and subtopic.

# File _plugins/jekyll-topic-filter.rb, line 881
def self.filter_by_topic_subtopic(site, topic_name, subtopic_id)
  resource_pages = filter_by_topic(site, topic_name)

  # Select out materials with the correct subtopic
  resource_pages = resource_pages.select { |x| x['subtopic'] == subtopic_id }

  if resource_pages.empty?
    Jekyll.logger.error "Error? Could not find any relevant pages for #{topic_name} / #{subtopic_id}"
  end

  resource_pages
end
get_posts(site) click to toggle source

Get the list of posts from the site Params:

site

The Jekyll::Site object

Returns:

Array

The list of posts

This is a transition period function that can later be removed. It is added because with the jekyll version we’re using, site.posts is an iterable in prod+dev (_config-dev.yml) modes, however! If we access site.posts.docs in prod it’s fine, while in dev mode, site.posts claims to be an Array (rather than I guess a ‘posts’ object with a docs method). So we check if it has docs and use that, otherwise just site.posts should be iterable.

# File _plugins/jekyll-topic-filter.rb, line 311
def self.get_posts(site)
  # Handle the transition period
  if site.posts.respond_to?(:docs)
    site.posts.docs
  else
    site.posts
  end
end
get_version(tool) click to toggle source

Get the version of a tool. Parameters:

tool

A tool string

Returns:

String

The version of the tool.

Examples: get_version(“toolshed.g2.bx.psu.edu/repos/galaxyp/regex_find_replace/regex1/1.0.0”) => “1.0.0”

# File _plugins/jekyll-topic-filter.rb, line 936
def self.get_version(tool)
  if tool.count('/') > 4
    tool.split('/')[-1]
  else
    tool
  end
end
git_log(wf_path) click to toggle source
# File _plugins/jekyll-topic-filter.rb, line 528
def self.git_log(wf_path)
  if Jekyll.env != 'production'
    return []
  end

  cache.getset(wf_path) do
    require 'shellwords'

    commits = %x[git log --format="%H %at %s" #{Shellwords.escape(wf_path)}]
      .split("\n")
      .map { |x| x.split(' ', 3) }
      .map { |x| { 'hash' => x[0], 'unix' => x[1], 'message' => x[2], 'short_hash' => x[0][0..8] } }

    commits.map.with_index do |c, i|
      c['num'] = commits.length - i
      c
    end
  end
end
graph_dot(wf) click to toggle source
# File _plugins/jekyll-topic-filter.rb, line 462
def self.graph_dot(wf)
  # We're converting it to Mermaid
  # flowchart TD
  #     A[Start] --> B{Is it?}
  #     B -- Yes --> C[OK]
  #     C --> D[Rethink]
  #     D --> B
  #     B -- No ----> E[End]
  # digraph test {
  #
  #   0[shape=box,style=filled,color=lightblue,label="ℹ️ Input Dataset\nBionano_dataset"]
  #   1[shape=box,style=filled,color=lightblue,label="ℹ️ Input Dataset\nHi-C_dataset_R"]
  #   3 -> 6 [label="output"]
  #   7[shape=box,label="Busco"]
  #   4 -> 7 [label="out_fa"]
  #   8[shape=box,label="Busco"]
  #   5 -> 8 [label="out_fa"]

  statements = [
    'node [fontname="Atkinson Hyperlegible", shape=box, color=white,style=filled,color=peachpuff,margin="0.2,0.2"];',
    'edge [fontname="Atkinson Hyperlegible"];',
  ]
  wf['steps'].each_key do |id|
    step = wf['steps'][id]
    chosen_label = mermaid_safe_label(step['label'] || step['name'])

    case step['type']
    when 'data_collection_input'
      statements.append "#{id}[color=lightblue,label=\"ℹ️ Input Collection\\n#{chosen_label}\"]"
    when 'data_input'
      statements.append "#{id}[color=lightblue,label=\"ℹ️ Input Dataset\\n#{chosen_label}\"]"
    when 'parameter_input'
      statements.append "#{id}[color=lightgreen,label=\"ℹ️ Input Parameter\\n#{chosen_label}\"]"
    when 'subworkflow'
      statements.append "#{id}[color=lightcoral,label=\"🛠️ Subworkflow\\n#{chosen_label}\"]"
    else
      statements.append "#{id}[label=\"#{chosen_label}\"]"
    end

    step = wf['steps'][id]
    step['input_connections'].each do |_, v|
      # if v is a list
      if v.is_a?(Array)
        v.each do |v2|
          statements.append "#{v2['id']} -> #{id} [label=\"#{mermaid_safe_label(v2['output_name'])}\"]"
        end
      else
        statements.append "#{v['id']} -> #{id} [label=\"#{mermaid_safe_label(v['output_name'])}\"]"
      end
    end

    (step['workflow_outputs'] || [])
      .reject { |wo| wo['label'].nil? }
      .map do |wo|
        wo['uuid'] = SecureRandom.uuid.to_s if wo['uuid'].nil?
        wo
      end
      .each do |wo|
        statements.append "k#{wo['uuid'].gsub('-', '')}[color=lightseagreen,label=\"Output\\n#{wo['label']}\"]"
        statements.append "#{id} -> k#{wo['uuid'].gsub('-', '')}"
      end
  end

  "digraph main {\n" + statements.map { |q| "  #{q}" }.join("\n") + "\n}"
end
identify_contributors(materials, site) click to toggle source

Get a list of contributors for a list of materials Parameters:

materials

An array of materials

Returns:

Array

An array of individual contributors as strings.

# File _plugins/jekyll-topic-filter.rb, line 900
def self.identify_contributors(materials, site)
  materials
    .map { |_k, v| v['materials'] }.flatten
    # Not 100% sure why this flatten is needed? Probably due to the map over hash
    .map { |mat| Gtn::Contributors.get_contributors(mat) }
    .flatten
    .select { |c| Gtn::Contributors.person?(site, c) }
    .uniq
    .shuffle
end
identify_funders_and_grants(materials, site) click to toggle source

Get a list of funders for a list of materials Parameters:

materials

An array of materials

Returns:

Array

An array of funder (organisations that provided support) IDs as strings.

# File _plugins/jekyll-topic-filter.rb, line 917
def self.identify_funders_and_grants(materials, site)
  materials
    .map { |_k, v| v['materials'] }.flatten
    # Not 100% sure why this flatten is needed? Probably due to the map over hash
    .map { |mat| Gtn::Contributors.get_all_funding(site, mat) }
    .flatten
    .uniq
    .shuffle
end
list_all_materials(site) click to toggle source

This is a helper function to get all the materials in a site.

# File _plugins/jekyll-topic-filter.rb, line 814
def self.list_all_materials(site)
  process_pages(site, site.pages)
end
list_all_tags(site) click to toggle source

List every tag used across all materials. This is used to generate the tag cloud.

Parameters:

site

The Jekyll::Site object, used to get the list of pages.

Returns:

Array

An array of strings, each string is a tag. (sorted and unique)

# File _plugins/jekyll-topic-filter.rb, line 834
def self.list_all_tags(site)
  materials = process_pages(site, site.pages)
  (materials.map { |x| x['tags'] || [] }.flatten + list_topics(site)).sort.uniq
end
list_materials_by_tool(site) click to toggle source

List materials by tool Parameters:

site

The Jekyll::Site object, used to get the list of pages.

Returns:

Hash

A hash of tool_id => {

"tool_id" => [tool_id, version],
"tutorials" => [tutorial_id, tutorial_title, topic_title, tutorial_url]

}

# File _plugins/jekyll-topic-filter.rb, line 970
def self.list_materials_by_tool(site)
  tool_map = {}

  list_all_materials(site).each do |m|
    m.fetch('tools', []).each do |tool|
      sid = short_tool(tool)
      tool_map[sid] = { 'tool_id' => [], 'tutorials' => [] } if !tool_map.key?(sid)

      tool_map[sid]['tool_id'].push([tool, get_version(tool)])
      tool_map[sid]['tutorials'].push([
                                        m['id'], m['title'], site.data[m['topic_name']]['title'], m['url']
                                      ])
    end
  end

  # Uniqueify/sort
  t = tool_map.to_h do |k, v|
    v['tool_id'].uniq!
    v['tool_id'].sort_by! { |k2| k2[1] }
    v['tool_id'].reverse!

    v['tutorials'].uniq!
    v['tutorials'].sort!
    [k, v]
  end

  # Order by most popular tool
  t.sort_by { |_k, v| v['tutorials'].length }.reverse.to_h
end
list_materials_structured(site, topic_name) click to toggle source

This function returns a list of all the materials that are available for a specific topic, but this time in a structured manner Params:

site

The Jekyll::Site object

topic_name

The name of the topic

Returns:

Hash

The subtopics and their materials

Example:

{
 "intro" => {
   "subtopic" => {"title" => "Introduction", "description" => "Introduction to the topic", "id" => "intro"},
   "materials" => [
     ...
   ]
 },
 "__OTHER__" => {
   "subtopic" => {"title" => "Other", "description" => "Other materials", "id" => "__OTHER__"},
   "materials" => [.. ]
 }
]
# File _plugins/jekyll-topic-filter.rb, line 91
def self.list_materials_structured(site, topic_name)
  # This method is built with the idea to replace the "topic_filter" command,
  # and instead of returning semi-structured data, we will immediately return
  # fully structured data for a specific "topic_name" query, like, "admin"
  #
  # Instead of returning a flat list of tutorials, instead we'll structure
  # them properly in subtopics (if they exist) or return the flat list
  # otherwise.
  #
  # This will let us generate new "views" into the tutorial lists, having
  # them arranged in new and exciting ways.

  fill_cache(site)

  # Here we want to either return data structured around subtopics

  if site.data[topic_name]['tag_based'].nil? && site.data[topic_name].key?('subtopics')
    # We'll construct a new hash of subtopic => tutorials
    out = {}
    seen_ids = []
    site.data[topic_name]['subtopics'].each do |subtopic, _v|
      specific_resources = filter_by_topic_subtopic(site, topic_name, subtopic['id'])
      out[subtopic['id']] = {
        'subtopic' => subtopic,
        'materials' => specific_resources
      }
      seen_ids += specific_resources.map { |x| x['id'] }
    end

    # And we'll have this __OTHER__ subtopic for any tutorials that weren't
    # in a subtopic.
    all_topics_for_tutorial = filter_by_topic(site, topic_name)
    out['__OTHER__'] = {
      'subtopic' => { 'title' => 'Other', 'description' => 'Assorted Tutorials', 'id' => 'other' },
      'materials' => all_topics_for_tutorial.reject { |x| seen_ids.include?(x['id']) }
    }
  elsif site.data[topic_name]['tag_based'] && site.data[topic_name].key?('subtopics')
    out = {}
    seen_ids = []
    tn = topic_name.gsub('by_tag_', '')
    materials = filter_by_tag(site, tn)

    # For each subtopics
    site.data[topic_name]['subtopics'].each do |subtopic|
      # Find matching tag-based tutorials in our filtered-by-tag materials
      specific_resources = materials.select { |x| (x['tags'] || []).include?(subtopic['id']) }
      out[subtopic['id']] = {
        'subtopic' => subtopic,
        'materials' => specific_resources
      }
      seen_ids += specific_resources.map { |x| x['id'] }
    end

    filter_by_tag(site, tn)
    out['__OTHER__'] = {
      'subtopic' => { 'title' => 'Other', 'description' => 'Assorted Tutorials', 'id' => 'other' },
      'materials' => materials.reject { |x| seen_ids.include?(x['id']) }
    }
  elsif site.data[topic_name]['tag_based'] # Tag based Topic
    # We'll construct a new hash of subtopic(parent topic) => tutorials
    out = {}
    seen_ids = []
    tn = topic_name.gsub('by_tag_', '')
    materials = filter_by_tag(site, tn)

    # Which topics are represented in those materials?
    seen_topics = materials.map { |x| x['topic_name'] }.sort

    # Treat them like subtopics, but fake subtopics.
    seen_topics.each do |parent_topic, _v|
      specific_resources = materials.select { |x| x['topic_name'] == parent_topic }
      out[parent_topic] = {
        'subtopic' => { 'id' => parent_topic, 'title' => site.data[parent_topic]['title'], 'description' => nil },
        'materials' => specific_resources
      }
      seen_ids += specific_resources.map { |x| x['id'] }
    end

    # And we'll have this __OTHER__ subtopic for any tutorials that weren't
    # in a subtopic.
    all_topics_for_tutorial = filter_by_tag(site, tn)
    out['__OTHER__'] = {
      'subtopic' => { 'title' => 'Other', 'description' => 'Assorted Tutorials', 'id' => 'other' },
      'materials' => all_topics_for_tutorial.reject { |x| seen_ids.include?(x['id']) }
    }
  else
    # Or just the list (Jury is still out on this one, should it really be a
    # flat list? Or in this identical structure.)
    out = {
      '__FLAT__' => {
        'subtopic' => nil,
        'materials' => filter_by_topic(site, topic_name)
      }
    }
  end

  # Cleanup empty sections
  out.delete('__OTHER__') if out.key?('__OTHER__') && out['__OTHER__']['materials'].empty?

  out.each do |_k, v|
    v['materials'].sort_by! { |m| [m.fetch('priority', 1), m['title']] }
  end

  out
end
list_topics(site) click to toggle source

This function returns a list of all the topics that are available. Params:

site

The Jekyll::Site object

Returns:

Array

The list of topics

# File _plugins/jekyll-topic-filter.rb, line 16
def self.list_topics(site)
  list_topics_h(site).keys
end
list_topics_h(site) click to toggle source
# File _plugins/jekyll-topic-filter.rb, line 20
def self.list_topics_h(site)
  site.data.select { |_k, v| v.is_a?(Hash) && v.key?('editorial_board') }
end
list_videos(site) click to toggle source

This is a helper function to get all the materials in a site.

# File _plugins/jekyll-topic-filter.rb, line 820
def self.list_videos(site)
  materials = process_pages(site, site.pages)
  materials.select { |x| x['video'] == true }
end
mermaid(wf) click to toggle source
# File _plugins/jekyll-topic-filter.rb, line 398
def self.mermaid(wf)
  # We're converting it to Mermaid.js
  # flowchart TD
  #     A[Start] --> B{Is it?}
  #     B -- Yes --> C[OK]
  #     C --> D[Rethink]
  #     D --> B
  #     B -- No ----> E[End]

  statements = []
  wf['steps'].each_key do |id|
    step = wf['steps'][id]
    chosen_label = mermaid_safe_label(step['label'] || step['name'])

    case step['type']
    when 'data_collection_input'
      statements.append "#{id}[\"ℹ️ Input Collection\\n#{chosen_label}\"];"
    when 'data_input'
      statements.append "#{id}[\"ℹ️ Input Dataset\\n#{chosen_label}\"];"
    when 'parameter_input'
      statements.append "#{id}[\"ℹ️ Input Parameter\\n#{chosen_label}\"];"
    when 'subworkflow'
      statements.append "#{id}[\"🛠️ Subworkflow\\n#{chosen_label}\"];"
    else
      statements.append "#{id}[\"#{chosen_label}\"];"
    end

    case step['type']
    when 'data_collection_input', 'data_input'
      statements.append "style #{id} stroke:#2c3143,stroke-width:4px;"
    when 'parameter_input'
      statements.append "style #{id} fill:#ded,stroke:#393,stroke-width:4px;"
    when 'subworkflow'
      statements.append "style #{id} fill:#edd,stroke:#900,stroke-width:4px;"
    end

    step = wf['steps'][id]
    step['input_connections'].each do |_, v|
      # if v is a list
      if v.is_a?(Array)
        v.each do |v2|
          statements.append "#{v2['id']} -->|#{mermaid_safe_label(v2['output_name'])}| #{id};"
        end
      else
        statements.append "#{v['id']} -->|#{mermaid_safe_label(v['output_name'])}| #{id};"
      end
    end

    (step['workflow_outputs'] || [])
      .reject { |wo| wo['label'].nil? }
      .map do |wo|
        wo['uuid'] = SecureRandom.uuid.to_s if wo['uuid'].nil?
        wo
      end
      .each do |wo|
      statements.append "#{wo['uuid']}[\"Output\\n#{wo['label']}\"];"
      statements.append "#{id} --> #{wo['uuid']};"
      statements.append "style #{wo['uuid']} stroke:#2c3143,stroke-width:4px;"
    end
  end

  "flowchart TD\n" + statements.map { |q| "  #{q}" }.join("\n")
end
mermaid_safe_label(label) click to toggle source
# File _plugins/jekyll-topic-filter.rb, line 390
def self.mermaid_safe_label(label)
  (label || '')
    .gsub('(', '').gsub(')', '')
    .gsub('[', '').gsub(']', '')
    .gsub('"', '”') # We accept that this is not perfectly correct.
    .gsub("'", '’')
end
process_pages(site, pages) click to toggle source
# File _plugins/jekyll-topic-filter.rb, line 769
def self.process_pages(site, pages)
  # eww.
  return site.data['cache_processed_pages'] if site.data.key?('cache_processed_pages')

  materials = collate_materials(site, pages).map { |_k, v| resolve_material(site, v) }
  Jekyll.logger.info '[GTN/TopicFilter] Filling Materials Cache'
  site.data['cache_processed_pages'] = materials

  # Prepare short URLs
  shortlinks = site.data['shortlinks']
  mappings = Hash.new { |h, k| h[k] = [] }

  shortlinks.each_key do |kp|
    shortlinks[kp].each do |k, v|
      mappings[v].push("/short/#{k}")
    end
  end
  # Update the materials with their short IDs + redirects
  pages.select { |p| mappings.keys.include? p.url }.each do |p|
    # Set the short id on the material
    if p['ref']
      # Initialise redirects if it wasn't set
      p['ref'].data['redirect_from'] = [] if !p['ref'].data.key?('redirect_from')
      p['ref'].data['redirect_from'].push(*mappings[p.url])
      p['ref'].data['redirect_from'].uniq!
    else
      p.data['redirect_from'] = [] if !p.data.key?('redirect_from')

      p.data['redirect_from'].push(*mappings[p.url])
      p.data['redirect_from'].uniq!
    end
  end
  # Same for news
  get_posts(site).select { |p| mappings.keys.include? p.url }.each do |p|
    # Set the short id on the material
    p.data['redirect_from'] = [] if !p.data.key?('redirect_from')
    p.data['redirect_from'].push(*mappings[p.url])
    p.data['redirect_from'].uniq!
  end

  materials
end
resolve_material(site, material) click to toggle source
# File _plugins/jekyll-topic-filter.rb, line 548
def self.resolve_material(site, material)
  # We've already
  # looked in every /topic/*/tutorials/* folder, and turn these disparate
  # resources into a page_obj as well. Most variables are copied directly,
  # either from a tutorial, or a slides (if no tutorial is available.) This
  # means we do not (cannot) support external_slides AND external_handson.
  # This is probably a sub-optimal situation we'll end up fixing someday.
  #
  tutorials = material['resources'].select { |a| a[0] == 'tutorial' }
  slides    = material['resources'].select { |a| a[0] == 'slides' }
  tours     = material['resources'].select { |a| a[0] == 'tours' }

  # Our final "page" object (a "material")
  page = nil

  slide_has_video = false
  slide_has_recordings = false
  slide_translations = []
  page_ref = nil

  if slides.length.positive?
    page = slides.min { |a, b| a[1].path <=> b[1].path }[1]
    slide_has_video = page.data.fetch('video', false)
    slide_has_recordings = page.data.fetch('recordings', false)
    slide_translations = page.data.fetch('translations', [])
    page_ref = page
  end

  # No matter if there were slides, we override with tutorials if present.
  tutorial_translations = []
  if tutorials.length.positive?
    page = tutorials.min { |a, b| a[1].path <=> b[1].path }[1]
    tutorial_translations = page.data.fetch('translations', [])
    page_ref = page
  end

  if page.nil?
    Jekyll.logger.error '[GTN/TopicFilter] Could not process material'
    return {}
  end

  # Otherwise clone the metadata from it which works well enough.
  page_obj = page.data.dup
  page_obj['id'] = "#{page['topic_name']}/#{page['tutorial_name']}"
  page_obj['ref'] = page_ref
  page_obj['ref_tutorials'] = tutorials.map { |a| a[1] }
  page_obj['ref_slides'] = slides.map { |a| a[1] }

  id = page_obj['id']

  # Sometimes `hands_on` is set to something like `external`, in which
  # case it is important to not override it. So we only do that if the
  # key isn't already set. Then we choose to set it to a test for the
  # tutorial being present. We probably don't need to test both, but it
  # is hard to follow which keys are which and safer to test for both in
  # case someone edits the code later. If either of these exist, we can
  # automatically set `hands_on: true`
  page_obj['hands_on'] = tutorials.length.positive? if !page_obj.key?('hands_on')

  # Same for slides, if there's a resource by that name, we can
  # automatically set `slides: true`
  page_obj['slides'] = slides.length.positive? if !page_obj.key?('slides')

  all_resources = slides + tutorials
  page_obj['mod_date'] = all_resources
                         .map { |p| Gtn::ModificationTimes.obtain_time(p[1].path) }
                         .max

  page_obj['pub_date'] = all_resources
                         .map { |p| Gtn::PublicationTimes.obtain_time(p[1].path) }
                         .min

  page_obj['version'] = all_resources
                        .map { |p| Gtn::ModificationTimes.obtain_modification_count(p[1].path) }
                        .max

  folder = material['dir']

  ymls = Dir.glob("#{folder}/quiz/*.yml") + Dir.glob("#{folder}/quiz/*.yaml")
  if ymls.length.positive?
    quizzes = ymls.map { |a| a.split('/')[-1] }
    page_obj['quiz'] = quizzes.map do |q|
      quiz_data = YAML.load_file("#{folder}/quiz/#{q}")
      {
        'id' => q,
        'path' => "#{folder}/quiz/#{q}",
        'title' => quiz_data['title'],
        'contributors' => quiz_data['contributors'],
      }
    end
  end

  # In dev configuration, this breaks for me. Not sure why config isn't available.
  domain = if !site.config.nil? && site.config.key?('url')
             "#{site.config['url']}#{site.config['baseurl']}"
           else
             'http://localhost:4000/training-material/'
           end
  # Similar as above.
  workflows = Dir.glob("#{folder}/workflows/*.ga") # TODO: support gxformat2
  if workflows.length.positive?
    workflow_names = workflows.map { |a| a.split('/')[-1] }
    page_obj['workflows'] = workflow_names.map do |wf|
      wfid = "#{page['topic_name']}-#{page['tutorial_name']}"
      wfname = wf.gsub(/.ga/, '').downcase.gsub(/[^a-z0-9]/, '-')
      trs = "api/ga4gh/trs/v2/tools/#{wfid}/versions/#{wfname}"
      wf_path = "#{folder}/workflows/#{wf}"
      wf_json = JSON.parse(File.read(wf_path))
      license = wf_json['license']
      creators = wf_json['creator'] || []
      wftitle = wf_json['name']

      # /galaxy-intro-101-workflow.eu.json
      workflow_test_results = Dir.glob(wf_path.gsub(/.ga$/, '.*.json'))
      workflow_test_outputs = {}
      workflow_test_results.each do |test_result|
        server = workflow_test_results[0].match(/\.(..)\.json$/)[1]
        workflow_test_outputs[server] = JSON.parse(File.read(test_result))
      end
      workflow_test_outputs = nil if workflow_test_outputs.empty?

      wfhkey = [page['topic_name'], page['tutorial_name'], wfname].join('/')

      {
        'workflow' => wf,
        'tests' => Dir.glob("#{folder}/workflows/" + wf.gsub(/.ga/, '-test*')).length.positive?,
        'url' => "#{domain}/#{folder}/workflows/#{wf}",
        'url_html' => "#{domain}/#{folder}/workflows/#{wf.gsub(/.ga$/, '.html')}",
        'path' => wf_path,
        'wfid' => wfid,
        'wfname' => wfname,
        'trs_endpoint' => "#{domain}/#{trs}",
        'license' => license,
        'parent_id' => page_obj['id'],
        'topic_id' => page['topic_name'],
        'tutorial_id' => page['tutorial_name'],
        'creators' => creators,
        'name' => wf_json['name'],
        'title' => wftitle,
        'version' => Gtn::ModificationTimes.obtain_modification_count(wf_path),
        'description' => wf_json['annotation'],
        'tags' => wf_json['tags'],
        'features' => {
          'report' => wf_json['report'],
          'subworkflows' => wf_json['steps'].map{|_, x| x['type']}.any?{|x| x == "subworkflow"},
          'comments' => (wf_json['comments'] || []).length.positive?,
          'parameters' =>  wf_json['steps'].map{|_, x| x['type']}.any?{|x| x == "parameter_input"},
        },
        'workflowhub_id' => (site.data['workflowhub'] || {}).fetch(wfhkey, nil),
        'history' => git_log(wf_path),
        'test_results' => workflow_test_outputs,
        'modified' => File.mtime(wf_path),
        'mermaid' => mermaid(wf_json),
        'graph_dot' => graph_dot(wf_json),
        'workflow_tools' => extract_workflow_tool_list(wf_json).flatten.uniq.sort,
        'inputs' => wf_json['steps'].select { |_k, v| ['data_input', 'data_collection_input', 'parameter_input'].include? v['type'] }.map{|_, v| v},
        'outputs' => wf_json['steps'].select { |_k, v| v['workflow_outputs'] && v['workflow_outputs'].length.positive? }.map{|_, v| v},
      }
    end
  end

  # Really only used for tool list install for ephemeris, not general.
  page_obj['api'] = "#{domain}/api/topics/#{page['topic_name']}/tutorials/#{page['tutorial_name']}/tutorial.json"

  # Tool List
  #
  # This is exposed in the GTN API to help admins/devs easily get the tool
  # list for installation.
  page_obj['tools'] = []
  page_obj['tools'] += page.content.scan(/{% tool \[[^\]]*\]\(([^)]*)\)\s*%}/) if page_obj['hands_on']

  page_obj['workflows']&.each do |wf|
    wf_path = "#{folder}/workflows/#{wf['workflow']}"

    page_obj['tools'] += wf['workflow_tools']
  end
  page_obj['tools'] = page_obj['tools'].flatten.sort.uniq

  topic = site.data[page_obj['topic_name']]
  page_obj['supported_servers'] = if topic['type'] == 'use' || topic['type'] == 'basics'
                                    Gtn::Supported.calculate(site.data['public-server-tools'], page_obj['tools'])
                                  else
                                    []
                                  end

  page_obj['supported_servers_matrix'] = if topic['type'] == 'use' || topic['type'] == 'basics'
    Gtn::Supported.calculate_matrix(site.data['public-server-tools'], page_obj['tools'])
  else
    []
  end


  topic_name_human = site.data[page_obj['topic_name']]['title']
  page_obj['topic_name_human'] = topic_name_human # TODO: rename 'topic_name' and 'topic_name' to 'topic_id'
  admin_install = Gtn::Toolshed.format_admin_install(site.data['toolshed-revisions'], page_obj['tools'],
                                                     topic_name_human, site.data['toolcats'])
  page_obj['admin_install'] = admin_install
  page_obj['admin_install_yaml'] = admin_install.to_yaml

  page_obj['tours'] = tours.length.positive?
  page_obj['video'] = slide_has_video
  page_obj['slides_recordings'] = slide_has_recordings
  page_obj['translations'] = {}
  page_obj['translations']['tutorial'] = tutorial_translations
  page_obj['translations']['slides'] = slide_translations
  page_obj['translations']['video'] = slide_has_video # Just demand it?
  page_obj['license'] = 'CC-BY-4.0' if page_obj['license'].nil?
  # I feel less certain about this override, but it works well enough in
  # practice, and I did not find any examples of `type: <anything other
  # than tutorial>` in topics/*/tutorials/*/tutorial.md but that doesn't
  # make it future proof.
  page_obj['type'] = 'tutorial'

  if page_obj.key?('draft') && page_obj['draft']
    page_obj['tags'] = [] if !page_obj.key? 'tags'
    page_obj['tags'].push('work-in-progress')
  end

  page_obj
end
short_tool(tool) click to toggle source

Get a short version of a tool. Parameters:

tool

A tool string

Returns:

String

The short version of the tool.

Examples: short_tool(“toolshed.g2.bx.psu.edu/repos/galaxyp/regex_find_replace/regex1/1.0.0”) => “galaxyp/regex1”

# File _plugins/jekyll-topic-filter.rb, line 953
def self.short_tool(tool)
  if tool.count('/') > 4
    "#{tool.split('/')[2]}/#{tool.split('/')[4]}"
  else
    tool
  end
end
topic_filter(site, topic_name) click to toggle source

This function returns a list of all the materials that are available for a specific topic. Params:

site

The Jekyll::Site object

topic_name

The name of the topic

Returns:

Array

The list of materials

# File _plugins/jekyll-topic-filter.rb, line 64
def self.topic_filter(site, topic_name)
  fill_cache(site)
  site.data['cache_topic_filter'][topic_name]
end