!/usr/bin/env python import argparse import csv import io import json import logging import multiprocessing.pool import os import requests

def fetch_and_extract_individual_server_tools(server):

# request the tools via the API
url = '%s/api/tools?in_panel=False' % server['url'].rstrip('/')
try:
    response = requests.get(url, timeout=20)
except:
    print(server['name'] + " Connection Timeout (20s)")
    return

# check status
if response.status_code != requests.codes.ok:
    print(server['name'] + " Bad status (%s)" % response.status_code)
    return

# check content
if response.text.find("</html>") != -1:
    print(server['name'] + " No JSON output")
    return

# extract the list of tools in this instance
try:
    response_json = response.json()
except json.decoder.JSONDecodeError:
    print(server['name'] + " Invalid JSON")
    return

found_tools = set()
for tool in response_json:
    found_tools.add(tool['id'])

return server['name'], {
    'url': server['url'],
    'tools': list(set(found_tools))
}

def extract_public_galaxy_servers_tools():

"""Extract the tools from the public Galaxy servers using their API"""
server_tools = {}

to_process = []
serverlist = requests.get('https://galaxyproject.org/use/feed.json').json()
for server in serverlist:
    # We intentionally drop all usegalaxy.eu subdomains. They're all the
    # same as the top level domain and just pollute the supported instances
    # list.
    if '.usegalaxy.eu' in server['url']:
        continue
    # Apparently the french do it too
    if '.usegalaxy.fr' in server['url']:
        continue
    # The aussies will soon
    if '.usegalaxy.org.au' in server['url']:
        continue
    # No test servers permitted
    if 'test.' in server['url']:
        continue

    s = { 'name': server['title'], 'url': server['url'] }
    to_process.append(s)

pool = multiprocessing.pool.ThreadPool(processes=20)
processed = pool.map(fetch_and_extract_individual_server_tools, to_process, chunksize=1)
pool.close()

for server_data in processed:
    if server_data:
        server_tools[server_data[0]] = server_data[1]

return server_tools

if __name__ == ‘__main__’:

parser = argparse.ArgumentParser(description='Extract which public Galaxy servers can run specific tools')
args = parser.parse_args()

server_tools = extract_public_galaxy_servers_tools()
# Reverse the mapping
tool_servers = {
    'servers': [],
    'tools': {},
}
for idx, (server_name, server_data) in enumerate(server_tools.items()):
    tool_servers['servers'].append({
        'url': server_data['url'],
        'name': server_name,
    })
    for tool in server_data['tools']:
        if tool.count('/') > 4:
            tool_id = '/'.join(tool.split('/')[:5])
            tool_version = '/'.join(tool.split('/')[5:])
            if tool_id not in tool_servers['tools']:
                tool_servers['tools'][tool_id] = {}
            if tool_version not in tool_servers['tools'][tool_id]:
                tool_servers['tools'][tool_id][tool_version] = []

            tool_servers['tools'][tool_id][tool_version].append(idx)
        else:
            if tool not in tool_servers['tools']:
                tool_servers['tools'][tool] = {"_": []}
            tool_servers['tools'][tool]['_'].append(idx)

with open('metadata/public-server-tools.json', 'w') as handle:
    json.dump(tool_servers, handle)