#!/usr/bin/env python import os import argparse import yaml import re import json import marko import re from marko.ast_renderer import ASTRenderer from marko import Markdown import sys import knittingneedles as knit

# read in a tutorial, and check the structure of it. parser = argparse.ArgumentParser(

description="Extract a 'script' from a tutorial with commits, commands, and more."

) parser.add_argument(“tutorial”, type=argparse.FileType(“r”), help=“Input tutorial”) args = parser.parse_args()

# Parse out the markdown header text = args.tutorial.readlines() meta = text text = text[text.index('—n') + 1:] text = ''.join(text) meta = ''.join(meta) meta = yaml.safe_load(meta)

# Alternatively, you can register extensions later. markdown = Markdown(renderer=ASTRenderer) data = markdown(text)

def dataKv(line):

for m in re.findall('(?P<k>data-[a-z-]*)="(?P<v>[^"]*)"', line):
    yield (m[0][5:], m[1].strip('"'))

def fixspoken(kids):

text = ""
for y in kids:
    if y['element'] != 'paragraph':
        continue

    for x in y['children']:
        if x['element'] == 'raw_text' and x['children'][0:2] != '{:':
            text += x['children']
        elif x['element'] == 'line_break':
            text += ' '
    text += ' '

text = re.sub('\s+', ' ', text)
meta = {'type': 'spoken', 'text': text.strip()}
meta.update({'data': dict(dataKv(kids[-1]['children'][-1]['children']))})
return meta

def fixcommit(a, b):

code = a['children'][0]['children']
# Remove endraw at end
code = code.strip().split('\n')
if code[-1] == '{% endraw %}':
    code = code[:-1]
code = '\n'.join(code)

meta = {'type': 'code', 'code': code}
meta.update({'data': dict(dataKv(b))})
return meta

def fixcmd(a, b):

code = a['children'][0]['children']
meta = {'type': 'cmd', 'cmd': code.strip()}
meta.update({'data': dict(dataKv(b))})
return meta

def emit(d, i=0):

# Spoken Text
if d['element'] == 'quote':
    # print('FOUND')

    # We guard access in try/except
    try:
        kid = d['children'][-1]['children'][-1]
    except:
        kid = None

    # In order to not hide errors of actual access
    if kid is not None and kid['element'] == 'raw_text':
        # print('HERE')
        lastkid = kid['children']
        # print(lastkid)
        if '.spoken' in lastkid:
            yield fixspoken(d['children'])

# Code blocks (commits)
if 'children' in d and isinstance(d['children'], list):
    for idx, x in enumerate(d['children']):
        # print(idx, x)
        if x['element'] == 'fenced_code':
            # print(x)
            if idx + 1 < len(d['children']):
                nk = d['children'][idx + 1]['children'][0]['children']
                if 'data-commit' in nk:
                    yield fixcommit(x, nk)
                elif 'data-cmd' in nk:
                    yield fixcmd(x, nk)

if 'children' in d and isinstance(d['children'], list):
    for child in d['children']:
        # print(f'emit {child}')
        yield from emit(child, i=i+1)

def reduceSteps(it):

seen = []
out = []
for step in it:
    if 'ref' in step['data']:
        ref = step['data']['ref']
        if ref not in seen:
            out.append(step)
            seen.append(ref)
        else:
            twin = [x for x in out if x['data'].get('ref', None) == ref][0]
            # the voiced bit always comes after??
            twin['text'] = step['text']
            twin['type'] = [ twin['type'], step['type'] ]
            del twin['data']['ref']
    else:
        out.append(step)
return out

DEBUG = os.environ.get('DEBUG', True) != True if DEBUG:

print(json.dumps(data))

else:

output = {'meta': {k: v for k, v in meta.items() if k in ('title', 'contributors', 'voice')}}
output['steps'] = []
for x in reduceSteps(emit(data)):
    # print(x)
    if 'commit' in x['data'] or x['type'] == 'cmd' or 'cmd' in x['type']:
        x['data']['visual'] = 'terminal'
    output['steps'].append(x)

print(yaml.dump(output))