!/usr/bin/env python import argparse import csv import io import json import logging import multiprocessing.pool import os import requests
def fetch_and_extract_individual_server_tools(server):
# request the tools via the API url = '%s/api/tools?in_panel=False' % server['url'].rstrip('/') try: response = requests.get(url, timeout=20) except: print(server['name'] + " Connection Timeout (20s)") return # check status if response.status_code != requests.codes.ok: print(server['name'] + " Bad status (%s)" % response.status_code) return # check content if response.text.find("</html>") != -1: print(server['name'] + " No JSON output") return # extract the list of tools in this instance try: response_json = response.json() except json.decoder.JSONDecodeError: print(server['name'] + " Invalid JSON") return found_tools = set() for tool in response_json: found_tools.add(tool['id']) return server['name'], { 'url': server['url'], 'tools': list(set(found_tools)) }
def extract_public_galaxy_servers_tools():
"""Extract the tools from the public Galaxy servers using their API""" server_tools = {} to_process = [] serverlist = requests.get('https://galaxyproject.org/use/feed.json').json() for server in serverlist: # We intentionally drop all usegalaxy.eu subdomains. They're all the # same as the top level domain and just pollute the supported instances # list. if '.usegalaxy.eu' in server['url']: continue # Apparently the french do it too if '.usegalaxy.fr' in server['url']: continue # The aussies will soon if '.usegalaxy.org.au' in server['url']: continue # No test servers permitted if 'test.' in server['url']: continue s = { 'name': server['title'], 'url': server['url'] } to_process.append(s) pool = multiprocessing.pool.ThreadPool(processes=20) processed = pool.map(fetch_and_extract_individual_server_tools, to_process, chunksize=1) pool.close() for server_data in processed: if server_data: server_tools[server_data[0]] = server_data[1] return server_tools
if __name__ == ‘__main__’:
parser = argparse.ArgumentParser(description='Extract which public Galaxy servers can run specific tools') args = parser.parse_args() server_tools = extract_public_galaxy_servers_tools() # Reverse the mapping tool_servers = { 'servers': [], 'tools': {}, } for idx, (server_name, server_data) in enumerate(server_tools.items()): tool_servers['servers'].append({ 'url': server_data['url'], 'name': server_name, }) for tool in server_data['tools']: if tool.count('/') > 4: tool_id = '/'.join(tool.split('/')[:5]) tool_version = '/'.join(tool.split('/')[5:]) if tool_id not in tool_servers['tools']: tool_servers['tools'][tool_id] = {} if tool_version not in tool_servers['tools'][tool_id]: tool_servers['tools'][tool_id][tool_version] = [] tool_servers['tools'][tool_id][tool_version].append(idx) else: if tool not in tool_servers['tools']: tool_servers['tools'][tool] = {"_": []} tool_servers['tools'][tool]['_'].append(idx) with open('metadata/public-server-tools.json', 'w') as handle: json.dump(tool_servers, handle)