class Object
Constants
- APPROVED_VOICES
- ARI_MAP
- BASE_REF
- CONTRIBUTORS
Full Contributors Data
- CONTRIBUTORS_SCHEMA
- CONTRIBUTORS_SCHEMA_UNSAFE
Any error messages
- END_OF_SENTENCE_DURATION
- END_OF_SLIDE_DURATION
- FASTIMAGE_AVAILABLE
The fastimage gem may not be installed, and that is OK, we will fall back to another method
- FEED_WIDGET_XSLT
- GALAXIES
- GRANTS
- GRANTS_SCHEMA
- GRANTS_SCHEMA_UNSAFE
- GTN_CACHE
- GTN_HOME
- ICON_FOR
- NOW
modifiedfiles = ‘git diff –cached –name-only –ignore-all-space –diff-filter=M #{options}`.split(“n”)
- ORGANISATIONS
- ORGANISATIONS_SCHEMA
- ORGANISATIONS_SCHEMA_UNSAFE
- OUR_PATH
- PRIO
- PUNCTUATION
- ROOT_PATH
two directories up
- TEST_PHRASES
- TITLE_TOPIC
- TRACKING
- TUTO_ID
- WORD_MAP
Public Class Methods
We have the problem of renamed FAQs, these will be stored somewhere other than the auto-generated name.
We need to eliminate that duplication and ensure we don’t overwrite an existing one.
So we need to discover all existing FAQs and their google form IDs.
# File bin/google-form-faq.rb, line 21 def self.discover_faqs paths = [] paths += Dir.glob('faqs/**/*.md') paths += Dir.glob('topics/**/faqs/*.md') paths += Dir.glob('topics/**/faqs/*.md') # Reject symlinks paths.reject { |path| File.symlink?(path) } end
Public Instance Methods
# File _plugins/feeds.rb, line 218 def all_date_sorted_materials(site) events = site.pages.select { |x| x['layout'] == 'event' || x['layout'] == 'event-external' } materials = TopicFilter.list_all_materials(site).reject { |k, _v| k['draft'] } news = site.posts.select { |x| x['layout'] == 'news' } faqs = site.pages.select { |x| x['layout'] == 'faq' } pathways = site.pages.select { |x| x['layout'] == 'learning-pathway' } workflows = Dir.glob('topics/**/*.ga') bucket = events.map do |e| [Gtn::PublicationTimes.obtain_time(e.path).to_datetime, 'events', e, ['event'] + e.data.fetch('tags', [])] end materials.each do |m| tags = [m['topic_name']] + (m['tags'] || []) m.fetch('ref_tutorials', []).map do |t| bucket << [Gtn::PublicationTimes.obtain_time(t.path).to_datetime, 'tutorials', t, tags] (t['recordings'] || []).map do |r| url = '/' + t.path.gsub(/tutorial(_[A_Z_]*)?.(html|md)$/, 'recordings/') url += "#tutorial-recording-#{Date.parse(r['date']).strftime('%-d-%B-%Y').downcase}" attr = {'title' => "Recording of " + t['title'], 'contributors' => r['speakers'] + (r['captions'] || []), 'content' => "A #{r['length']} long recording is now available."} obj = objectify(attr, url, t.path) bucket << [DateTime.parse(r['date'].to_s), 'recordings', obj, tags] end end m.fetch('ref_slides', []).reject { |s| s.url =~ /-plain.html/ }.map do |s| bucket << [Gtn::PublicationTimes.obtain_time(s.path).to_datetime, 'slides', s, tags] (s['recordings'] || []).map do |r| url = '/' + s.path.gsub(/slides(_[A_Z_]*)?.(html|md)$/, 'recordings/') url += "#tutorial-recording-#{Date.parse(r['date']).strftime('%-d-%B-%Y').downcase}" attr = {'title' => "Recording of " + s['title'], 'contributors' => r['speakers'] + (r['captions'] || []), 'content' => "A #{r['length']} long recording is now available."} obj = objectify(attr, url, s.path) bucket << [DateTime.parse(r['date'].to_s), 'recordings', obj, tags] end end end bucket += news.map do |n| [n.date.to_datetime, 'news', n, ['news'] + n.data.fetch('tags', [])] end bucket += faqs.map do |n| tag = Gtn::PublicationTimes.clean_path(n.path).split('/')[1] [Gtn::PublicationTimes.obtain_time(n.path).to_datetime, 'faqs', n, ['faqs', tag]] end bucket += pathways.map do |n| tags = ['learning-pathway'] + (n['tags'] || []) [Gtn::PublicationTimes.obtain_time(n.path).to_datetime, 'learning-pathways', n, tags] end bucket += workflows.map do |n| tag = Gtn::PublicationTimes.clean_path(n).split('/')[1] wf_data = JSON.parse(File.read(n)) attrs = { 'title' => wf_data['name'], 'description' => wf_data['annotation'], 'tags' => wf_data['tags'], 'contributors' => wf_data.fetch('creator', []).map do |c| matched = site.data['contributors'].select{|k, v| v.fetch('orcid', "does-not-exist") == c.fetch('identifier', "").gsub('https://orcid.org/', '') }.first if matched matched[0] else c['name'] end end } # These aren't truly stable. I'm not sure what to do about that. obj = objectify(attrs, '/' + n.gsub(/\.ga$/, '.html'), n) # obj = objectify(attrs, '/' + n.path[0..n.path.rindex('/')], n) [Gtn::PublicationTimes.obtain_time(n).to_datetime, 'workflows', obj, ['workflows', tag] + obj['tags']] end # Remove symlinks from bucket. bucket = bucket.reject { |date, type, page, tags| File.symlink?(page.path) || File.symlink?(File.dirname(page.path)) || File.symlink?(File.dirname(File.dirname(page.path))) } bucket += site.data['contributors'].map do |k, v| a = {'title' => "@#{k}", 'content' => "GTN Contributions from #{k}"} obj = objectify(a, "/hall-of-fame/#{k}/", k) [DateTime.parse("#{v['joined']}-01T12:00:00", 'content' => "GTN Contributions from #{k}"), 'contributors', obj, ['contributor']] end bucket += site.data['grants'].map do |k, v| a = {'title' => "@#{k}", 'content' => "GTN Contributions from #{k}"} obj = objectify(a, "/hall-of-fame/#{k}/", k) # TODO: backdate grants, organisations if v['joined'] [DateTime.parse("#{v['joined']}-01T12:00:00"), 'grants', obj, ['grant']] end end.compact bucket += site.data['organisations'].map do |k, v| a = {'title' => "@#{k}", 'content' => "GTN Contributions from #{k}"} obj = objectify(a, "/hall-of-fame/#{k}/", k) if v['joined'] [DateTime.parse("#{v['joined']}-01T12:00:00"), 'organisations', obj, ['organisation']] end end.compact bucket .reject{|x| x[0] > DateTime.now } # Remove future-dated materials .reject{|x| x[2]['draft'] == true } # Remove drafts .sort_by {|x| x[0] } # Date-sorted, not strictly necessary since will be grouped. .reverse end
# File bin/gtn.rb, line 7 def automagic_loading(f) # Remove our documentation f.reject! { |k, v| k == 'description' and v.is_a?(String) } f.reject! { |k| k == '_examples' } # Auto-replace CONTRIBUTORS in enums. f.each do |k, v| if v.is_a?(Hash) automagic_loading(v) elsif v.is_a?(Array) if k == 'enum' repl = [] # If one of the elements in this array is CONTRIBUTORS, replace it with the same named variable repl << CONTRIBUTORS.keys if v.find { |x| x == 'CONTRIBUTORS' } repl << GRANTS.keys if v.find { |x| x == 'GRANTS' } repl << ORGANISATIONS.keys if v.find { |x| x == 'ORGANISATIONS' } v.replace repl.flatten if repl.length.positive? end v.flatten.each { |x| automagic_loading(x) if x.is_a?(Hash) } end end f end
# File bin/news.rb, line 187 def build_news(data, filter: nil, updates: true, only_news: false) infix = filter.nil? ? '' : titleize(filter) output = "# GTN #{infix} News for #{NOW.strftime('%b %d')}" newsworthy = false if filter.nil? output += format_news(data[:added][:news]) newsworthy |= format_news(data[:added][:news]).length.positive? end if only_news return [output, newsworthy] end o = format_events( data[:added][:events].select { |n| filter.nil? || safe_load_yaml(n[:path]).fetch('tags', []).include?(filter) } ) output += o newsworthy |= o.length.positive? o = format_tutorials( data[:added][:tutorials].select { |n| filter.nil? || n[:path] =~ %r{topics/#{filter}} }, data[:modified][:tutorials].select { |n| filter.nil? || n[:path] =~ %r{topics/#{filter}} }, updates: updates ) output += o newsworthy |= o.length.positive? o = format_tutorials( data[:added][:slides].select { |n| filter.nil? || n[:path] =~ %r{topics/#{filter}} }, data[:modified][:slides].select { |n| filter.nil? || n[:path] =~ %r{topics/#{filter}} }, kind: 'slides', updates: updates ) output += o newsworthy |= o.length.positive? if filter.nil? && data[:contributors].length.positive? newsworthy = true output += "\n\n## #{data[:contributors].length} new contributors!\n\n" output += data[:contributors].map { |c| linkify("@#{c}", "hall-of-fame/#{c}") }.join("\n").gsub(/^/, '- ') end if filter.nil? && data[:organisations].length.positive? newsworthy = true output += "\n\n## #{data[:organisations].length} new organisations!\n\n" output += data[:organisations].map { |c| linkify("@#{c}", "hall-of-fame/#{c}") }.join("\n").gsub(/^/, '- ') end if filter.nil? && data[:grants].length.positive? newsworthy = true output += "\n\n## #{data[:grants].length} new grants!\n\n" output += data[:grants].map { |c| linkify("@#{c}", "hall-of-fame/#{c}") }.join("\n").gsub(/^/, '- ') end [output, newsworthy] end
# File bin/ari-synthesize.rb, line 60 def call_engine(engine, line, mp3, voice, lang, neural) if engine == 'aws' awseng = if neural 'neural' else 'standard' end # Synthesize args = ['aws', 'polly', 'synthesize-speech', '--engine', awseng, '--language-code', lang, '--voice-id', voice, '--output-format', 'mp3', '--text', line, mp3] _, stderr, err = Open3.capture3(*args) if err.exited? && err.exitstatus.positive? puts "ERROR: #{stderr}" puts "ERROR: #{err}" exit 1 end elsif engine == 'mozilla' raw = Tempfile.new('synth-raw') _, stderr, err = Open3.capture3('curl', '--silent', '-G', '--output', raw.path, "http://localhost:5002/api/tts?text=#{CGI.escape(line)}") if err.exited? && err.exitstatus.positive? puts "ERROR: #{stderr}" exit 1 end _, stderr, err = Open3.capture3('ffmpeg', '-loglevel', 'error', '-i', raw.path, '-y', mp3) if err.exited? && err.exitstatus.positive? puts "ERROR: #{stderr}" exit 1 end end end
# File bin/check-indent.rb, line 5 def check_indent(file) doc = Nokogiri::HTML(File.open(file)) # Find all <pre> tags # <div class="language-plaintext highlighter-rouge"><div class="highlight"><pre class="highlight"><code> ec = false doc.css('div.language-plaintext.highlighter-rouge div.highlight pre.highlight code').each do |pre| # Get the text content of the <pre> tag content = pre.text # Split the content by newlines lines = content.split("\n") # If all lines look like URLs: if lines.all? { |line| line =~ %r{://} } # If any are space indented lines.each do |line| if line =~ /^\s+/ puts "#{file}: Indentation error: #{line}" ec = true end end end end ec end
# File _plugins/util.rb, line 9 def collapse_event_date_pretty(event) s = event['date_start'] e = if event['date_end'].nil? s else event['date_end'] end # want dates like "Mar 22-25, 2024" or "Mar 22-May 1, 2024" dash = ' – ' # thin space, en dash, thin space if s.year == e.year if s.month == e.month if s.day == e.day "#{s.strftime('%B')} #{s.day}, #{s.year}" else "#{s.strftime('%B')} #{s.day}#{dash}#{e.day}, #{s.year}" end else "#{s.strftime('%B')} #{s.day}#{dash}#{e.strftime('%B')} #{e.day}, #{s.year}" end else "#{s.strftime('%B')} #{s.day}, #{s.year}#{dash}#{e.strftime('%B')} #{e.day}, #{e.year}" end end
# File bin/ari-synthesize.rb, line 48 def correct(uncorrected_line) # First we try and catch the things we can directly replace (esp usegalaxy.*) line = uncorrected_line.strip.split.map do |w| translate(w) end.join(' ') # Now we do more fancy replacements line.strip.split(/([ ‘’,'".:;!`()])/).reject(&:empty?).compact.map do |w| translate(w) end.join end
# File bin/update-data-library, line 139 def data_library_for_tutorial(path) File.join(File.dirname(path), 'data-library.yaml') end
Get the list of toolcats
# File bin/fetch-categories.rb, line 8 def fetch_toolcats(server) uri = URI.parse(server.to_s) request = Net::HTTP::Get.new(uri) req_options = { use_ssl: uri.scheme == 'https', } response = Net::HTTP.start(uri.hostname, uri.port, req_options) do |http| http.request(request) end begin JSON.parse(response.body) do |w| w end rescue StandardError {} end end
# File bin/workflows-fetch.rb, line 39 def fetch_workflowhub projects = JSON.parse(request('https://workflowhub.eu/projects').body) project_mapping = projects['data'].to_h { |p| [p['id'], p['attributes']['title']] } response = request('https://workflowhub.eu/workflows?filter[workflow_type]=galaxy') data = JSON.parse(response.body) if !data['links']['next'].nil? puts 'ERROR: Cannot yet handle multiple pages' exit 42 end puts "INFO: Fetching #{data['data'].length} workflows from WorkflowHub" data['data'].map.with_index do |w, _i| # {"id"=>"14", "type"=>"workflows", "attributes"=>{"title"=>"Cheminformatics - Docking"}, "links"=>{"self"=>"/workflows/14"}} wf_info = JSON.parse(request("https://workflowhub.eu#{w['links']['self']}").body) creator_list = [] creator0 = wf_info['data']['attributes']['creators'][0] if creator0.nil? # Other creators other = wf_info['data']['attributes']['other_creators'] if !other.nil? && other.length.positive? creator_list.push(wf_info['data']['attributes']['other_creators'].split(',').map(&:strip)) end else # Primary creator_list.push("#{creator0['given_name']} #{creator0['family_name']}") end # Projects wf_info['data']['relationships']['projects']['data'].each do |p| creator_list.push(project_mapping[p['id']]) end creator_list = creator_list.flatten.compact.uniq begin r = { 'name' => wf_info['data']['attributes']['title'], 'owner' => creator_list.join(', '), 'number_of_steps' => wf_info['data']['attributes']['internals']['steps'].length, 'server' => 'https://workflowhub.eu', 'id' => wf_info['data']['id'], 'tags' => wf_info['data']['attributes']['tags'].map { |t| t.gsub(/^name:/, '') }, 'update_time' => wf_info['data']['attributes']['updated_at'], } rescue StandardError r = nil end r end.compact end
Get the list of workflows
# File bin/workflows-fetch.rb, line 21 def fetch_workflows(server) begin response = request("#{server}/api/workflows/") rescue StandardError puts "ERROR: Failed to fetch workflows from #{server}" return [] end begin JSON.parse(response.body).map do |w| w['server'] = server w end rescue StandardError [] end end
# File bin/news.rb, line 66 def filterSlides(x) x =~ %r{topics/.*/tutorials/.*/slides.*\.html} end
new news new slidevideos new contributors Done new tutorials Done new slides Done
# File bin/news.rb, line 62 def filterTutorials(x) x =~ %r{topics/.*/tutorials/.*/tutorial.*\.md} end
# File bin/ari-synthesize.rb, line 94 def find_duration(mp3) stdout, = Open3.capture2('ffprobe', '-loglevel', 'error', '-show_format', '-show_streams', '-print_format', 'json', '-i', mp3) data = JSON.parse(stdout) data['format']['duration'].to_f end
# File bin/news.rb, line 96 def fixEvents(n) # news/_posts/2021-11-10-api.html => news/2021/11/10/api.html meta = safe_load_yaml(n[:path]) n[:md] += " (#{collapse_event_date_pretty(meta)})" n end
# File bin/news.rb, line 91 def fixNews(n) # news/_posts/2021-11-10-api.html => news/2021/11/10/api.html n[:md].gsub(%r{news/_posts/(....)-(..)-(..)-(.*.html)}, 'news/\1/\2/\3/\4') end
# File _plugins/feeds.rb, line 377 def format_contents(xml, site, parts, title, group_by: 'day') # output += '<div xmlns="http://www.w3.org/1999/xhtml">' end
# File bin/news.rb, line 160 def format_events(events) output = '' if events.length.positive? output += "\n\n## 📆 New Events!\n\n" output += events.join("\n").gsub(/^/, '- ') end output end
# File bin/news.rb, line 151 def format_news(news) output = '' if news.length.positive? output += "\n\n## Big News!\n\n" output += news.join("\n").gsub(/^/, '- ') end output end
# File bin/news.rb, line 169 def format_tutorials(added, modified, kind: 'tutorials', updates: true) output = '' count = added.length count += modified.length if updates output += "\n\n## #{count} #{kind}!" if count.positive? if added.length.positive? output += "\n\nNew #{kind}:\n\n" output += added.map { |n| n[:md] }.join("\n").gsub(/^/, '- ') end if updates && modified.length.positive? output += "\n\nUpdated #{kind}:\n\n" output += modified.map { |n| n[:md] }.join("\n").gsub(/^/, '- ') end output end
# File _plugins/feeds.rb, line 645 def generate_event_feeds(site) events = site.pages.select { |x| x['layout'] == 'event' || x['layout'] == 'event-external' } feed_path = File.join(site.dest, 'events', 'feed.xml') Jekyll.logger.info '[GTN/Feeds] Generating event feed' # Pre-filering. updated = events.map { |x| Gtn::PublicationTimes.obtain_time(x.path) }.max events = events .reject { |x| x.data.fetch('draft', '').to_s == 'true' } .reject { |x| x.data['event_over'] == true } # Remove past events, prunes our feed nicely. .sort_by { |page| Gtn::PublicationTimes.obtain_time(page.path) } .reverse if !events.empty? Jekyll.logger.debug "Found #{events.length} events" end builder = Nokogiri::XML::Builder.new(encoding: 'UTF-8') do |xml| # Set stylesheet xml.feed(xmlns: 'http://www.w3.org/2005/Atom') do # Set generator also needs a URI attribute xml.generator('Jekyll', uri: 'https://jekyllrb.com/') xml.link(href: "#{site.config['url']}#{site.baseurl}/events/feed.xml", rel: 'self') xml.link(href: "#{site.config['url']}#{site.baseurl}/events/", rel: 'alternate') xml.updated(updated.to_datetime.rfc3339) xml.id("#{site.config['url']}#{site.baseurl}/events/feed.xml") xml.title('Events') xml.subtitle('Events in the Inter-Galactic Network') xml.logo("#{site.config['url']}#{site.baseurl}/assets/images/GTN-60px.png") events.each do |page| xml.entry do pdate = collapse_event_date_pretty(page.data) xml.title("[#{pdate}] #{page.data['title']}") link = "#{site.config['url']}#{site.baseurl}#{page.url}" xml.link(href: link) # Our links are stable xml.id(link) # This is a feed of only NEW tutorials, so we only include publication times. # xml.published(Gtn::PublicationTimes.obtain_time(page.path).to_datetime.rfc3339) xml.published(Gtn::PublicationTimes.obtain_time(page.path).to_datetime.rfc3339) xml.updated(Gtn::PublicationTimes.obtain_time(page.path).to_datetime.rfc3339) # TODO: find a better solution maybe with namespaces? # xml.category(term: "starts:#{page.data['date_start'].to_datetime.rfc3339}") # xml.category(term: "ends:#{(page.data['date_end'] || page.data['date_start']).to_datetime.rfc3339}") # xml.category(term: "days:#{page.data['duration']}") # xml.path(page.path) xml.category(term: "new #{page['layout']}") # xml.content(page.content, type: "html") xml.summary(page.data['description']) if page.data['location'] && page.data['location']['geo'] lat = page.data['location']['geo']['lat'] lon = page.data['location']['geo']['lon'] xml.georss('point', "#{lat} #{lon}") end Gtn::Contributors.get_organisers(page.data).each do |c| xml.author do xml.name(Gtn::Contributors.fetch_name(site, c, warn:false)) xml.uri("#{site.config['url']}#{site.baseurl}/hall-of-fame/#{c}/") if page.data['contact_email'] xml.email(page.data['contact_email']) end end end Gtn::Contributors.get_instructors(page.data).each do |c| xml.contributor do xml.name(Gtn::Contributors.fetch_name(site, c, warn:false)) xml.uri("#{site.config['url']}#{site.baseurl}/hall-of-fame/#{c}/") end end end end end end serialise(site, feed_path, builder) end
Our old style matrix bot postsx
# File _plugins/feeds.rb, line 523 def generate_matrix_feed(site, mats, group_by: 'day', filter_by: nil) # new materials (tut + sli) # new grants/contributors/orgs # new news posts(?) filter_title = nil if !filter_by.nil? mats = mats.select { |x| x[3].include?(filter_by) } filter_title = filter_by.gsub('-', ' ').capitalize end case group_by when 'day' # Reject anything that is today mats = mats.reject { |x| x[0].strftime('%Y-%m-%d') == Date.today.strftime('%Y-%m-%d') } when 'week' mats = mats.reject { |x| x[0].strftime('%Y-%W') == Date.today.strftime('%Y-%W') } when 'month' mats = mats.reject { |x| x[0].strftime('%Y-%m') == Date.today.strftime('%Y-%m') } end bucket = group_bucket_by(mats, group_by: group_by) lookup = { 'day' => 'Daily', 'week' => 'Weekly', 'month' => 'Monthly' } parts = [filter_by || 'matrix', group_by || 'all'] path = "feeds/#{parts.join('-')}.xml" feed_path = File.join(site.dest, path) Jekyll.logger.info '[GTN/Feeds] Generating matrix feed' dir = File.dirname(feed_path) FileUtils.mkdir_p(dir) unless File.directory?(dir) # Group by days builder = Nokogiri::XML::Builder.new(encoding: 'UTF-8') do |xml| # Set stylesheet xml.feed(xmlns: 'http://www.w3.org/2005/Atom') do # Set generator also needs a URI attribute xml.generator('Jekyll', uri: 'https://jekyllrb.com/') xml.link(href: "#{site.config['url']}#{site.baseurl}/#{path}", rel: 'self') xml.link(href: "#{site.config['url']}#{site.baseurl}/", rel: 'alternate') # convert '2024-01-01' to date xml.updated(DateTime.now.rfc3339) xml.id("#{site.config['url']}#{site.baseurl}/#{path}") title_parts = [filter_title, "#{lookup[group_by]} Updates"].compact xml.title(title_parts.join(' — ')) xml.subtitle('The latest events, tutorials, slides, blog posts, FAQs, workflows, learning paths, recordings, and contributors in the GTN.') xml.logo("#{site.config['url']}#{site.baseurl}/assets/images/GTN-60px.png") bucket.each do |date, parts| xml.entry do case group_by when 'day' title = "#{date.strftime('%B %d, %Y')}" when 'week' title = "#{date.strftime('W%W, %Y')}" when 'month' title = "#{date.strftime('%B %Y')}" end xml.title(title) # Our IDs should be stable xml.id("#{site.config['url']}#{site.baseurl}/#{group_by}/#{date.strftime('%Y-%m-%d')}") # This is a feed of only NEW tutorials, so we only include publication times. xml.published(parts.map { |x| x[0] }.min.to_datetime.rfc3339) xml.updated(parts.map { |x| x[0] }.max.to_datetime.rfc3339) # xml.category(term: "new #{type}") xml.content(type: 'xhtml') do xml.div(xmlns: 'http://www.w3.org/1999/xhtml') do # xml.h4 title parts.group_by { |x| x[1] }.sort_by { |x| PRIO[x[0]] }.each do |type, items| xml.h4 "#{ICON_FOR[type]} #{type.gsub(/-/, ' ').capitalize}" if items.length.positive? xml.ul do items.each do |date, _type, page, _tags| xml.li do if page.is_a?(String) href = track("#{site.config['url']}#{site.config['baseurl']}/hall-of-fame/#{page}/") text = "@#{page}" else text = page.data['title'] href = track("#{site.config['url']}#{site.config['baseurl']}#{page.url}") end if group_by != 'day' text += " (#{date.strftime('%B %d, %Y')})" end xml.a(text, href: href) end end end end end if group_by != 'day' xml.small do xml.span 'Powered by ' xml.a('GTN RSS Feeds', href: 'https://training.galaxyproject.org/training-material/news/2024/06/04/gtn-standards-rss.html') end end end end xml.author do xml.name('GTN') xml.uri("#{site.config['url']}#{site.baseurl}/hall-of-fame/") xml.email('galaxytrainingnetwork@gmail.com') end end end end end serialise(site, feed_path, builder) end
# File _plugins/feeds.rb, line 383 def generate_matrix_feed_itemized(site, mats, group_by: 'day', filter_by: nil) filter_title = nil if !filter_by.nil? mats = mats.select { |x| x[3].include?(filter_by) } filter_title = filter_by.gsub('-', ' ').capitalize end case group_by when 'day' # Reject anything that is today mats = mats.reject { |x| x[0].strftime('%Y-%m-%d') == Date.today.strftime('%Y-%m-%d') } when 'week' mats = mats.reject { |x| x[0].strftime('%Y-%W') == Date.today.strftime('%Y-%W') } when 'month' mats = mats.reject { |x| x[0].strftime('%Y-%m') == Date.today.strftime('%Y-%m') } end bucket = group_bucket_by(mats, group_by: group_by) lookup = { 'day' => 'Daily', 'week' => 'Weekly', 'month' => 'Monthly', nil => 'All' } parts = [filter_by || 'matrix', group_by || 'all'] path = "feeds/#{parts.join('-')}.i.xml" feed_path = File.join(site.dest, path) Jekyll.logger.info '[GTN/Feeds] Generating matrix/i feed' dir = File.dirname(feed_path) FileUtils.mkdir_p(dir) unless File.directory?(dir) # Group by days builder = Nokogiri::XML::Builder.new(encoding: 'UTF-8') do |xml| # Set stylesheet xml.feed(xmlns: 'http://www.w3.org/2005/Atom') do # Set generator also needs a URI attribute xml.generator('Jekyll', uri: 'https://jekyllrb.com/') xml.link(href: "#{site.config['url']}#{site.baseurl}/#{path}", rel: 'self') xml.link(href: "#{site.config['url']}#{site.baseurl}/", rel: 'alternate') # convert '2024-01-01' to date xml.updated(DateTime.now.rfc3339) xml.id("#{site.config['url']}#{site.baseurl}/#{path}") title_parts = ["GTN", filter_title, lookup[group_by], "Updates"].compact # title used for slack's 'bot name', so should be something useful. xml.title(title_parts.join(' ')) xml.subtitle('The latest events, tutorials, slides, blog posts, FAQs, workflows, and contributors in the GTN.') xml.logo("#{site.config['url']}#{site.baseurl}/assets/images/GTN-60px.png") bucket.each do |bucket_date, parts| parts.group_by { |x| x[1] }.sort_by { |x| PRIO[x[0]] }.each do |type, items| if items.length.positive? items.each do |date, type, page, tags| # Entry per-item. xml.entry do # This is a feed of only NEW tutorials, so we only include publication times. if group_by.nil? xml.published(bucket_date.rfc3339) xml.updated(bucket_date.rfc3339) else xml.published(bucket_date.to_euro_lunch.rfc3339) xml.updated(bucket_date.to_euro_lunch.rfc3339) end href = "#{site.config['url']}#{site.config['baseurl']}#{page.url}" xml.id(href) xml.link(href: track(href)) tags.uniq.each do |tag| xml.category(term: tag) end xml.category(term: "new #{page['layout']}") if page.data.key?('description') xml.summary(page.data['description']) else md = page.content[0..page.content.index("\n")].strip html = markdownify(site, md) text = Nokogiri::HTML(html).text xml.summary(text) end prefix = type.gsub(/s$/, '').gsub(/-/, ' ').capitalize.gsub(/Faq/, 'FAQ').gsub(/New$/, 'Post') title = "#{ICON_FOR[type]} New #{prefix}: #{page.data['title']}" xml.title(title) had_authors = false Gtn::Contributors.get_authors(page.data).each do |c| xml.author do had_authors = true xml.name(Gtn::Contributors.fetch_name(site, c, warn:false)) if c !~ / / xml.uri("#{site.config['url']}#{site.baseurl}/hall-of-fame/#{c}/") end end end if !had_authors xml.author do xml.name('GTN') xml.uri("#{site.config['url']}#{site.baseurl}/hall-of-fame/") xml.email('galaxytrainingnetwork@gmail.com') end end Gtn::Contributors.get_non_authors(page.data).each do |c| xml.contributor do xml.name(Gtn::Contributors.fetch_name(site, c, warn:false)) if c !~ / / xml.uri("#{site.config['url']}#{site.baseurl}/hall-of-fame/#{c}/") end end end end end end end end xml.author do xml.name('GTN') xml.uri("#{site.config['url']}#{site.baseurl}/hall-of-fame/") xml.email('galaxytrainingnetwork@gmail.com') end end end serialise(site, feed_path, builder) end
# File _plugins/feeds.rb, line 115 def generate_opml(site, groups) builder = Nokogiri::XML::Builder.new(encoding: 'UTF-8') do |xml| # Set stylesheet xml.opml(version: '2.0') do xml.head do xml.title('Galaxy Training Network') xml.dateCreated(DateTime.now.rfc3339) xml.dateModified(DateTime.now.rfc3339) xml.ownerEmail('galaxytrainingnetwork@gmail.com') end xml.body do groups.each do |group, items| xml.outline(text: group) do items.each do |item| xml.outline(text: item[:title], type: 'rss', version: 'RSS', xmlUrl: item[:url], htmlUrl: item[:url]) end end end end end end opml_path = File.join(site.dest, 'feeds', 'gtn.opml') finalised = Nokogiri::XML builder.to_xml File.write(opml_path, finalised.to_xml) end
# File _plugins/feeds.rb, line 211 def generate_tag_topic_feeds(_site) # Any new materials in a topic with the equivalent tag # Any new materials tagged with that tag # Any news by tag '' end
# File _plugins/feeds.rb, line 142 def generate_topic_feeds(site, topic, bucket) mats = bucket.select { |x| x[3].include?(topic) } feed_path = File.join(site.dest, 'topics', topic, 'feed.xml') Jekyll.logger.info "[GTN/Feeds] Generating feed for #{topic} (#{mats.length} items)" builder = Nokogiri::XML::Builder.new(encoding: 'UTF-8') do |xml| # Set stylesheet xml.feed(xmlns: 'http://www.w3.org/2005/Atom') do # Set generator also needs a URI attribute xml.generator('Jekyll', uri: 'https://jekyllrb.com/') xml.link(href: "#{site.config['url']}#{site.baseurl}/topics/#{topic}/feed.xml", rel: 'self') xml.link(rel: 'alternate', href: "#{site.config['url']}#{site.baseurl}/topics/#{topic}/") xml.updated(mats.first[0].rfc3339) xml.id("#{site.config['url']}#{site.baseurl}/topics/#{topic}/feed.xml") topic_title = site.data[topic]['title'] xml.title("#{topic_title}") xml.subtitle("Recently added tutorials, slides, FAQs, and events in the #{topic} topic") xml.logo("#{site.config['url']}#{site.baseurl}/assets/images/GTN-60px.png") mats.each do |time, group, page, tags| xml.entry do xml.title(ICON_FOR[group] + " " + page.data['title']) link = "#{site.config['url']}#{site.baseurl}#{page.url}" xml.link(href: link) # Our links are (mostly) stable xml.id(link) # This is a feed of only NEW tutorials, so we only include publication times. # xml.published(Gtn::PublicationTimes.obtain_time(page.path).to_datetime.rfc3339) xml.updated(time.rfc3339) tags.uniq.each do |tag| xml.category(term: tag) end if page.data.key? 'description' xml.summary(page.data['description']) else md = page.content[0..page.content.index("\n")].strip html = markdownify(site, md) text = Nokogiri::HTML(html).text xml.summary(text) end Gtn::Contributors.get_authors(page.data).each do |c| xml.author do xml.name(Gtn::Contributors.fetch_name(site, c, warn:false)) if c !~ / / xml.uri("#{site.config['url']}#{site.baseurl}/hall-of-fame/#{c}/") end end end Gtn::Contributors.get_non_authors(page.data).each do |c| xml.contributor do xml.name(Gtn::Contributors.fetch_name(site, c, warn:false)) if c !~ / / xml.uri("#{site.config['url']}#{site.baseurl}/hall-of-fame/#{c}/") end end end end end end end serialise(site, feed_path, builder) end
# File bin/collect-gh.rb, line 15 def gh_cli_pr_info(num, fields) JSON.parse(`gh pr view #{num} --json #{fields.join(',')}`) end
# File bin/collect-gh.rb, line 10 def gh_cli_pr_list(count: 3) d = JSON.parse(`gh pr list --search 'sort:updated-desc is:merged' --limit #{count} --json number`) d.map { |pr| pr['number'] } end
# File _plugins/feeds.rb, line 347 def group_bucket_by(bucket, group_by: 'day') case group_by when 'day' bucket .group_by { |x| x[0].strftime('%Y-%m-%d') } .to_h { |_k, v| [v.map { |x| x[0] }.min, v] } when 'week' bucket .group_by { |x| x[0].strftime('%Y-%W') } .to_h { |_k, v| [v.map { |x| x[0] }.min, v] } when 'month' bucket .group_by { |x| x[0].strftime('%Y-%m') } .to_h { |_k, v| [v.map { |x| x[0] }.min, v] } else # Pretend this is an h # bucket # .map { |x| [x[0], x] } # .to_h bucket .map.with_index { |x, i| [x[0] + i / 100000000.0, [x]] } .to_h # We add an artificial separator in the range of miliseconds to each file, # should never grow more than 1s, likely, to ensure each of these are # individual items. This is kludge-y, yeah, but downstream processing wants # to group_by in places, and we don't want to trigger it collapsing there # too. end end
# File bin/news.rb, line 103 def isDraft(n) meta = safe_load_yaml(n) meta.fetch('draft', false) end
# File _plugins/notebook-jupyter.rb, line 6 def json_boxify(h, page) h['cells'].each do |cell| # If it's a list, loop if cell['source'].is_a? Array cell['source'].each do |line| # rubocop:disable Layout/LineLength line.gsub!(%r{<(?<boxclass>#{Gtn::Boxify.box_classes})-title( ?(?<noprefix>noprefix))>(?<title>.*?)</\s*\k<boxclass>-title\s*>}) do # rubocop:enable Layout/LineLength m = Regexp.last_match box_type = m[:boxclass] title = m[:title] noprefix = m[:noprefix] _, box = Gtn::Boxify.generate_title(box_type, title, lang, page.path, noprefix: noprefix) box end end else # rubocop:disable Layout/LineLength cell['source'].gsub!(%r{<(?<boxclass>#{Gtn::Boxify.box_classes})-title(?<noprefix>\s+noprefix)?>(?<title>.*?)</\s*\k<boxclass>-title\s*>}) do # rubocop:enable Layout/LineLength m = Regexp.last_match box_type = m[:boxclass] title = m[:title] noprefix = m[:noprefix] _, box = Gtn::Boxify.generate_title(box_type, title, 'en', page.path, noprefix: noprefix) box end end end h end
# File _plugins/notebook-jupyter.rb, line 120 def jupyter_post_write(site) site.config['__rendered_notebook_cache'].each do |_path, info| # Create if missing FileUtils.mkdir_p(info['dir']) # Write it out! File.write(info['path1'], info['content1']) File.write(info['path2'], info['content2']) end end
# File _plugins/notebook-jupyter.rb, line 38 def jupyter_pre_render(site) Jekyll.logger.info '[GTN/Notebooks] Rendering' site.config['__rendered_notebook_cache'] = {} # For every tutorial with the 'notebook' key in the page data site.pages.select { |page| GTNNotebooks.notebook_filter(page.data) }.each do |page| # We get the path to the tutorial source dir = File.dirname(File.join('.', page.url)) fn = File.join('.', page.url).sub(/html$/, 'md') notebook_language = page.data['notebook'].fetch('language', 'python') # Tag our source page page.data['tags'] = page.data['tags'] || [] page.data['tags'].push('jupyter-notebook') Jekyll.logger.info "[GTN/Notebooks] Rendering #{notebook_language} #{fn}" last_modified = Gtn::ModificationTimes.obtain_time(page.path) notebook = GTNNotebooks.render_jupyter_notebook(page.data, page.content, page.url, last_modified, notebook_language, site, dir) topic_id = dir.split('/')[-3] tutorial_id = dir.split('/')[-1] with_solutions = notebook.clone with_solutions['cells'] = with_solutions['cells'].map do |cell| if cell.fetch('cell_type') == 'markdown' && (cell['source'].is_a? String) m = cell['source'].match(/<blockquote class="solution"[^>]*>/) if m cell['source'].gsub!(/<blockquote class="solution"[^>]*>/, '<br/><details style="border: 2px solid #B8C3EA; margin: 1em 0.2em;' \ 'padding: 0.5em; cursor: pointer;"><summary>👁 View solution</summary>') idx = m.begin(0) q = cell['source'][0..idx] w = cell['source'][idx + 1..] e = w.index('</blockquote>') r = "#{w[0..e - 1]}</details>#{w[e + 13..]}" cell['source'] = q + r end end cell end # Write it out! ipynb_dir = File.join(site.dest, dir) ipynb_path = File.join(ipynb_dir, "#{topic_id}-#{tutorial_id}.ipynb") # page2 = PageWithoutAFile.new(site, '', dir, "#{topic_id}-#{tutorial_id}.ipynb") # page2.content = JSON.pretty_generate(with_solutions) # page2.data['layout'] = nil # page2.data['citation_target'] = 'jupyter' # site.pages << page2 # Create a no-solutions version: no_solutions = notebook.clone no_solutions['cells'] = no_solutions['cells'].map do |cell| if cell.fetch('cell_type') == 'markdown' && (cell['source'].is_a? String) cell['source'].gsub!(/<blockquote class="solution"[^>]*>/, '<blockquote class="solution" style="display:none">') end cell end ipynb_path2 = File.join(ipynb_dir, "#{topic_id}-#{tutorial_id}-course.ipynb") # page2 = PageWithoutAFile.new(site, '', dir, "#{topic_id}-#{tutorial_id}-course.ipynb") # page2.content = JSON.pretty_generate(no_solutions) # page2.data['layout'] = nil # page2.data['citation_target'] = 'jupyter' # site.pages << page2 site.config['__rendered_notebook_cache'][page.path] = { 'dir' => ipynb_dir, 'path1' => ipynb_path, 'content1' => JSON.pretty_generate(json_boxify(with_solutions, page)), 'path2' => ipynb_path2, 'content2' => JSON.pretty_generate(json_boxify(no_solutions, page)), } end end
# File bin/news.rb, line 81 def linkify(text, path) "[#{text.gsub('|', '-')}](https://training.galaxyproject.org/training-material/#{path}?utm_source=matrix&utm_medium=newsbot&utm_campaign=matrix-news)" end
# File bin/prepare_feedback.rb, line 55 def lookup_topic(topic_id) @cache ||= {} @cache.fetch(topic_id) do |key| file = "metadata/#{topic_id}.yaml" return nil unless File.exist? file data = safe_load_yaml(file) @cache[key] = data['title'] end end
# File bin/prepare_feedback.rb, line 34 def lookup_tuto(topic_id, tuto_id) @cache ||= {} @cache.fetch("#{topic_id}/#{tuto_id}") do |key| @cache[key] = nil file = "topics/#{topic_id}/tutorials/#{tuto_id}/tutorial.md" if File.exist? file data = safe_load_yaml(file) @cache[key] = data['title'] else file = "topics/#{topic_id}/tutorials/#{tuto_id}/slides.html" if File.exist? file data = safe_load_yaml(file) @cache[key] = data['title'] else puts "No file for #{topic_id}/#{tuto_id}" end end end end
Map a contributor ID to a JSON object which includes links to their profile page and API endpoint Params:
site
-
Jekyll::Site
object c
-
String
of contributor ID
Returns: Hash
of contributor information
# File _plugins/api.rb, line 53 def mapContributor(site, c) contrib_type, contrib = Gtn::Contributors.fetch(site, c) x = contrib .merge({ 'id' => c, 'url' => site.config['url'] + site.config['baseurl'] + "/api/#{contrib_type}s/#{c}.json", 'page' => site.config['url'] + site.config['baseurl'] + "/hall-of-fame/#{c}/", }) visitAndMarkdownify(site, x) end
Use Jekyll’s Markdown converter to convert text to HTML Params:
site
-
Jekyll::Site
object text
-
String
of text to convert
Returns: String
of markdown text
# File _plugins/api.rb, line 19 def markdownify(site, text) site.find_converter_instance( Jekyll::Converters::Markdown ).convert(text.to_s) end
# File _plugins/feeds.rb, line 40 def objectify(attrs, url, path) obj = attrs.clone obj['__path'] = path obj['__url'] = url def obj.data self end def obj.path self['__path'] end def obj.url self['__url'] end def obj.content self.fetch('content', 'NO CONTENT AVAILABLE') end obj end
# File bin/news.rb, line 70 def onlyEnabled(x) tutorial_meta = safe_load_yaml(x) tutorial_enabled = tutorial_meta.fetch('enable', true) topic = x.split('/')[1] topic_meta = safe_load_yaml("metadata/#{topic}.yaml") topic_enabled = topic_meta.fetch('enable', true) tutorial_enabled and topic_enabled end
# File bin/ari-synthesize.rb, line 157 def parseOptions options = {} OptionParser.new do |opts| opts.banner = 'Usage: ari-synthesize.rb [options]' options[:neural] = true options[:voice] = 'Amy' options[:lang] = 'en-GB' opts.on('--aws', 'Use AWS Polly') do |v| options[:aws] = v end opts.on('--mozilla', 'Use MozillaTTS') do |v| options[:mozilla] = v end opts.on('--non-neural', '[AWS] Non-neural voice') do |_v| options[:neural] = false end opts.on('--voice=VOICE', '[AWS] Voice ID') do |n| options[:voice] = n end opts.on('--lang=LANG', '[AWS] Language code') do |n| options[:lang] = n end opts.on('-fFILE', '--file=FILE', 'File containing line of text to speak') do |n| options[:file] = n end opts.on('-oFILE', '--output=FILE', 'Location to save the file in (defaults to auto-generated location)') do |n| options[:output] = n end opts.on('-v', '--[no-]verbose', 'Run verbosely') do |v| options[:verbose] = v end end.parse! if !(options[:aws] || options[:mozilla]) puts 'ERROR: You must use aws or mozilla' exit 1 end if !(options[:file]) puts 'ERROR: You must provide a file with a single sentence to speak' exit 1 end sentence = File.read(options[:file]).chomp if options[:aws] engine = 'aws' elsif options[:mozilla] engine = 'mozilla' end [sentence, engine, options] end
# File bin/update-data-library, line 149 def parse_metadata(path) parts = path.to_s.split('/') topic_id = parts[1] topic_metadata = YAML.load_file(File.join('metadata', "#{topic_id}.yaml")) tutorial_metadata = YAML.load_file(path) topic = { 'name' => topic_metadata['title'], 'description' => topic_metadata['summary'] } tutorial = { 'name' => tutorial_metadata['title'] } [topic, tutorial] end
# File bin/update-data-library, line 143 def parse_tutorial_for_zenodo_link(path) parse_zenodo_id_formats(YAML.load_file(path)['zenodo_link']) rescue StandardError nil end
# File bin/update-data-library, line 30 def parse_zenodo_id_formats(link) # https://zenodo.org/record/1234567 # https://zenodo.org/record/1234567#.X0X0X0X0X0X # doi:10.5281/zenodo.1234567 # doi:10.5281/zenodo.1234567#.X0X0X0X0X0X # 10.5281/zenodo.1234567 # 10.5281/zenodo.1234567#.X0X0X0X0X0X # https://doi.org/10.5281/zenodo.3732358 # https://doi.org/10.5281/zenodo.3732358#.X0X0X0X0X0X # link = link.split('#')[0] if link.match(/doi:/) || link.match(/^10.5281/) || link.match(/doi.org/) link.split('.')[-1] else link.split('/')[-1] end end
# File bin/news.rb, line 85 def printableMaterial(path) d = safe_load_yaml(path) { md: linkify(d['title'], path.gsub(/.md/, '.html')), path: path } end
# File bin/geocode.rb, line 8 def request(url) uri = URI.parse(url) request = Net::HTTP::Get.new(uri) request['Accept'] = 'application/json' request['User-Agent'] = 'GTN-geocode/1.0 (+https://github.com/galaxyproject/training-material)' req_options = { use_ssl: uri.scheme == 'https', } Net::HTTP.start(uri.hostname, uri.port, req_options) do |http| http.request(request) end end
# File _plugins/util.rb, line 3 def safe_load_yaml(file) YAML.load_file(file) rescue StandardError YAML.load_file(file, permitted_classes: [Date]) end
# File _plugins/util.rb, line 33 def safe_site_config(site, key, default) if !site.config.nil? && site.config.key?(key) site.config[key] else default end end
# File bin/news.rb, line 246 def send_news(output, options, channel: 'default') if options[:postToMatrix] # rubocop:disable Style/GlobalVars homeserver = $rooms[channel] # rubocop:enable Style/GlobalVars pp homeserver data = { 'msgtype' => 'm.notice', 'body' => output, 'format' => 'org.matrix.custom.html', 'formatted_body' => Kramdown::Document.new(output).to_html, } headers = { 'Authorization' => "Bearer #{ENV.fetch('MATRIX_ACCESS_TOKEN', nil)}", 'Content-type' => 'application/json', } uri_send_message = URI("#{homeserver[:server]}/_matrix/client/r0/rooms/#{homeserver[:room]}/send/m.room.message") req = Net::HTTP.post(uri_send_message, JSON.generate(data), headers) # Parse response resp = JSON.parse(req.body) puts resp if resp['errcode'] == 'M_FORBIDDEN' && (resp['error'] =~ /not in room/) puts 'Not in room, attempting to join' # Join room # POST /_matrix/client/v3/join/{roomIdOrAlias} uri_join = URI("#{homeserver[:server]}/_matrix/client/v3/join/#{homeserver[:room]}") req = Net::HTTP.post(uri_join, JSON.generate({}), headers) # Parse response resp = JSON.parse(req.body) # Now we're safe to re-try if resp.key?('room_id') req = Net::HTTP.post(uri_send_message, JSON.generate(data), headers) # Parse response resp = JSON.parse(req.body) puts resp end end else puts '===== NEWS START =====' puts output puts '===== NEWS END =====' end end
# File _plugins/feeds.rb, line 66 def serialise(site, feed_path, builder) # The builder won't let you add a processing instruction, so we have to # serialise it to a string and then parse it again. Ridiculous. if ! Dir.exist?(File.dirname(feed_path)) FileUtils.mkdir_p(File.dirname(feed_path)) end # First the 'default' with explanatory portion finalised = Nokogiri::XML builder.to_xml pi = Nokogiri::XML::ProcessingInstruction.new( finalised, 'xml-stylesheet', %(type="text/xml" href="#{site.config['url']}#{site.baseurl}/feed.xslt.xml") ) finalised.root.add_previous_sibling pi File.write(feed_path, finalised.to_xml) # Then the widget-compatible version with a more minimal representation: finalised = Nokogiri::XML builder.to_xml pi = Nokogiri::XML::ProcessingInstruction.new( finalised, 'xml-stylesheet', %(type="text/xml" href="#{site.config['url']}#{site.baseurl}/feed-widget.xslt.xml") ) finalised.root.add_previous_sibling pi File.write(feed_path.gsub(/\.xml$/, '.w.xml'), finalised.to_xml) # Write out HTML version since Safari doesn't support XSLT on XML. Rip. File.write(feed_path.gsub(/\.xml$/, '.w.html'), FEED_WIDGET_XSLT.transform(finalised)) end
# File _plugins/gtn/supported.rb, line 215 def short_id(tool_id) if tool_id.count('/') > 4 tool_id.split('/')[0..-2].join('/') else tool_id end end
# File bin/validate-contributors.rb, line 29 def show_errors(file, errs) # If we had no errors, validated successfully if errs.empty? puts "\e[38;5;40m#{file} validated succesfully\e[m" 0 else # Otherwise, print errors and exit non-zero puts "\e[48;5;09m#{file} has errors\e[m" errs.each { |x| puts " #{x}" } 1 end end
# File bin/ari-prep-script.rb, line 56 def split_sentence(sentence, timing) res = sentence.split chunk_size = (res.length.to_f / (res.length.to_f / 20).ceil).ceil chunks = res.each_slice(chunk_size).to_a.length res.each_slice(chunk_size).with_index.map do |chunk, idx| t0 = timing * (idx / chunks.to_f) tf = timing * ((1 + idx) / chunks.to_f) [chunk, t0, tf] end end
# File bin/ari-synthesize.rb, line 101 def synthesize(uncorrected_line, engine, voice: 'Amy', lang: 'en-GB', neural: true, output: nil) line = correct(uncorrected_line) digest = Digest::MD5.hexdigest line if output.nil? mp3 = File.join(GTN_CACHE, "#{engine}-#{digest}-#{voice}.mp3") json = File.join(GTN_CACHE, "#{engine}-#{digest}-#{voice}.json") if File.file?(mp3) duration = JSON.parse(File.read(json))['end'] return mp3, json, duration.to_f end else mp3 = output json = "#{output}.json" if File.file?(output) return mp3, json, 0.0 # Todo end end # Call our engine call_engine(engine, line, mp3, voice, lang, neural) duration = find_duration(mp3) if line.length < 200 && duration > 27 # Helena managed to find a specific bad string which, when fed to Mozilla's # TTS would generate # # In: Some important terms you should know. # Out Some important terms you should know know know know know know know know know know know know know know ... # # So we put in a check that the duration hasn't done something crazy, and # if it is add something to the end which seems to short-circuit that # error. # # I've reported this upstream but the response was not useful, apparently # this is an "expected failure mode". # # https://github.com/synesthesiam/docker-mozillatts/issues/9 # https://discourse.mozilla.org/t/sentences-which-trigger-an-endless-loop/72261/8 warn 'Strange: line was too long' call_engine(engine, "#{line}.", mp3) duration = find_duration(mp3) end if line.length < 200 && duration > 27 # Or maybe they just wrote a super long sentence. Or maybe we need to update the cutoff time. warn "ERROR: #{duration} of line is bad: #{line}" end # Now collect metadata for JSON json_handle = File.open(json, 'w') json_handle.write(JSON.generate({ time: 0, type: 'sentence', start: 0, end: duration, value: line })) json_handle.close [mp3, json, duration] end
# File bin/workflow-test.rb, line 16 def test_workflow(workflow_file, galaxy_id) directory = File.dirname(workflow_file) workflow_base = File.basename(workflow_file, '.ga') workflow_output_json = File.join(directory, "#{workflow_base}.#{galaxy_id}.json") galaxy_url = GALAXIES[galaxy_id][:url] galaxy_user_key = GALAXIES[galaxy_id][:key] cmd = [ 'planemo', '--verbose', 'test', '--galaxy_url', galaxy_url, '--galaxy_user_key', galaxy_user_key, '--no_shed_install', '--engine', 'external_galaxy', '--polling_backoff', '1', '--simultaneous_uploads', '--test_output_json', workflow_output_json, workflow_file ] p cmd.join(' ') Open3.popen3(*cmd) do |_stdin, stdout, stderr, wait_thr| exit_status = wait_thr.value # Process::Status object returned File.write("#{directory}/#{workflow_base}.#{galaxy_id}.log", stdout.read) File.write("#{directory}/#{workflow_base}.#{galaxy_id}.err", stderr.read) puts "#{workflow_file} => #{exit_status} (#{stderr})" end end
Intro slides. Fast: editly –json editly.json5 126,23s user 5,62s system 126% cpu 1:44,08 total Slow: editly –json editly.json5 902,71s user 69,27s system 326% cpu 4:57,54 total
# File bin/ari-prep-script.rb, line 39 def timefmt(t, fmt) seconds = t % (24 * 3600) hours = seconds.to_i / 3600 seconds = seconds % 3600 minutes = seconds.to_i / 60 seconds = seconds % 60 (seconds, ms) = seconds.divmod(1) # seconds = seconds ms = 1000 * ms if fmt == 'vtt' format('%<h>02d:%<m>02d:%<s>02d.%<ms>03d', { h: hours, m: minutes, s: seconds, ms: ms }) else format('%<h>02d:%<m>02d:%<s>02d,%<ms>03d', { h: hours, m: minutes, s: seconds, ms: ms }) end end
# File bin/news.rb, line 147 def titleize(t) t.gsub('-', ' ').gsub(/\w+/, &:capitalize) end
# File _plugins/feeds.rb, line 30 def track(url) if url =~ /utm_source/ url elsif url.include? '#' url.gsub(/#/, TRACKING + '#') else url + TRACKING end end
# File bin/ari-synthesize.rb, line 24 def translate(word) return word if /^\s+$/.match(word) return word if PUNCTUATION.find_index(word) return WORD_MAP[word] if WORD_MAP.key?(word) m = /([^A-Za-z0-9]*)([A-Za-z0-9]+)([^A-Za-z0-9]*)(.*)/.match(word) if !m puts "Error: #{word}" return word end fixed = if m[2] WORD_MAP.fetch(m[2].downcase, m[2]) else m[2] end # puts "#{m} ⇒ #{m[1] + fixed + m[3]}" m[1] + fixed + m[3] + m[4] end
# File _plugins/util.rb, line 56 def unsafe_slugify(text) text.gsub(%r{["'\\/;:,.!@#$%^&*()]}, '').gsub(/\s/, '-').gsub(/-+/, '-') end
# File bin/update-data-library, line 55 def update_data_library(path, topic, tutorial, zenodo_record) zenodo_id = zenodo_record['id'].to_s zenodo_files = zenodo_record.fetch('files', []).map do |f| official_extension = f['type'] link = f['links']['self'].sub(%r{/content$}, '') unofficial_extension = link.split('.')[-2..].join('.') ext = @SHARED_DATATYPES.fetch(unofficial_extension, nil) || @SHARED_DATATYPES.fetch(official_extension, nil) # Example: # https://zenodo.org/api/records/10870107/files/elem_s2_r1.fq.gz/content # Needs to be # https://zenodo.org/record/10870107/files/elem_s2_r1.fq.gz real_link = f['links']['self'].sub(%r{/content$}, '').sub('/api/records/', '/record/') # puts "Processing file: #{f['type']} #{f['links']['self']} => #{ext}" # puts "#{unofficial_extension} => #{@SHARED_DATATYPES.fetch(unofficial_extension, nil)}" # puts "#{official_extension} => #{@SHARED_DATATYPES.fetch(official_extension, nil)}" warn "Unknown file type: #{f['type']}. Consider adding this to shared/datatypes.yaml" if ext.nil? { 'url' => real_link, 'src' => 'url', 'ext' => ext || f['type'], 'info' => "https://doi.org/10.5281/zenodo.#{zenodo_id}", # 'checksum' => f['checksum'], # 'key' => f['key'], } end library = { 'destination' => { 'type' => 'library', 'name' => 'GTN - Material', 'description' => 'Galaxy Training Network Material', 'synopsis' => 'Galaxy Training Network Material. See https://training.galaxyproject.org', }, 'items' => [ 'name' => topic['name'], 'description' => topic['description'], 'items' => [ 'name' => tutorial['name'], 'items' => [ 'name' => "DOI: 10.5281/zenodo.#{zenodo_id}", 'description' => 'latest', 'items' => zenodo_files ] ] ] } data_library_path = data_library_for_tutorial(path) puts "Writing data library to #{data_library_path}" File.write(data_library_path, library.to_yaml) end
# File bin/update-data-library, line 48 def update_tutorial(path, zenodo_id) # Edit the yaml header of the markdown file to update the ID contents = File.read(path) contents.gsub!(/^zenodo_link:.*/, "zenodo_link: 'https://zenodo.org/record/#{zenodo_id}'") File.write(path, contents) end
# File _plugins/util.rb, line 42 def url_prefix(site) if !site.config.nil? && site.config.key?('url') "#{site.config['url']}#{site.config['baseurl']}" else 'http://localhost:4000/training-material/' end end
# File bin/validate-contributors.rb, line 22 def validate_document(document, validator) errors = validator.validate(document) return errors if errors && !errors.empty? [] end
Recursively visit a hash and markdownify all strings inside Params:
site
-
Jekyll::Site
object f
-
Hash
to visit
Returns: Hash
with all strings markdownified
# File _plugins/api.rb, line 32 def visitAndMarkdownify(site, f) case f when Array f.map! { |x| visitAndMarkdownify(site, x) } when Hash f = f.transform_values do |v| visitAndMarkdownify(site, v) end when String f = markdownify(site, f).strip.gsub(/<p>/, '').gsub(%r{</p>}, '') end f end
# File bin/update-data-library, line 109 def write_data_library(path, topic, tutorial, tutorial_zenodo_id, force) # Fetch the zenodo record zenodo_record = request("https://zenodo.org/api/records/#{tutorial_zenodo_id}") new_zenodo_id = zenodo_record['id'].to_s # If it's redirected we'll get a different ID here puts "Discovered zenodo link: #{new_zenodo_id}" # So load the data library, and check what's written there datalibrary_zenodo_id = if File.exist?(data_library_for_tutorial(path)) YAML.load_file(data_library_for_tutorial(path))['zenodo_id'].to_s end # If the ID has changed we should update the tutorial as well: if new_zenodo_id == tutorial_zenodo_id && !force warn 'Tutorial is up to date' else warn "Zenodo ID has changed from #{tutorial_zenodo_id} to #{new_zenodo_id}, updating the tutorial" update_tutorial(path, new_zenodo_id) end # If the new ID doesn't match the data library, then we should update it. if new_zenodo_id == datalibrary_zenodo_id && !force warn 'Data library is up to date' else warn "Zenodo ID has changed from #{datalibrary_zenodo_id} to #{new_zenodo_id}, updating the data library" update_data_library(path, topic, tutorial, zenodo_record) end end