#!/usr/bin/env ruby -KU
require 'rexml/document'        # for SVG and bibxml acrobatics
require 'yaml'
require 'shellwords'
require 'fileutils'
require_relative '../lib/kramdown-rfc/rfc8792'
require_relative '../lib/kramdown-rfc/rexml-all-text.rb'

def clean(s)
  s.gsub!(/\//, ":")
  s.gsub!(/\A([-.]+)/) {"_" * $1.size }
  s.gsub!(/[^-.:\w]+/, "-")
  s
end

$seen_slugs = {}

def slugify_like_xml2rfc(s, delta = 0)
  s = s.unicode_normalize(:nfkd).force_encoding(Encoding::ASCII).scrub('').downcase
  s.gsub!(/[^-\w\s\/]/, '')
  s = s.strip
  s.gsub!(/[-\s\/]/, '-')
  n = 32 - delta
  nmax = [s.size, 40 - delta].min
  while $seen_slugs[slugex = s[0...n]] && n < nmax
    n += 1
  end
  if $seen_slugs[slugex]
    m = 2
    while $seen_slugs[slugex = "#{s[0...n]}-#{m}"]
      m += 1
      if m == 1000
        raise ArgumentError, "Can't build distinguishable slug for '#{s}'"
      end
    end
  end
  $seen_slugs[slugex] = s
  slugex
end

target = nil
dir = nil
out_type = out_name = nil
unfold = true
targets = [:list, :files, :zip, :yaml]
require 'optparse'
begin
  op = OptionParser.new do |opts|
    opts.banner = "Usage: kramdown-rfc-extract-sourcecode [options] document.xml"
    opts.on("-tFMT", "--to=FMT", targets, "Target format #{targets.map(&:to_s)}") do |v|
      target = v
    end
    opts.on("-dDIR", "--dir=DIR", "Target directory (default: sourcecode)") do |v|
      dir = v
    end
    opts.on("-xTYPE/NAME", "--extract=TYPE/NAME", "Extract single item to stdout") do |v|
      target = :stdout
      out_type, out_name = v.split("/", 2)
    end
    opts.on("-f", "--[no-]unfold", "RFC8792-unfold (default: yes)") do |v|
      unfold = v
    end
  end
  op.parse!
rescue Exception => e
  warn e
  exit 1
end
if dir
  target ||= :files
  unless [:files, :zip].include? target
    warn "** Unused argument --dir=#{dir}"
  end
end
dir ||= "sourcecode"

target ||= :list

gensym = "unnamed-000"
taken = Hash.new { |h, k| h[k] = Hash.new }
warned = Hash.new { |h, k| h[k] = Hash.new }
filenames = ARGV.inspect
begin
  d = REXML::Document.new(ARGF)
  dr = d.root
  unless dr
    warn "*** Can't parse: #{filenames}"
    exit(1)
  end
rescue Errno::ENOENT
  warn "*** Not found: #{filenames}"
  exit(1)
rescue => e
  begin
    warn "*** Can't parse: #{filenames} (#{e.to_s[0..120]})"
  rescue => e
    warn "*** Can't parse: #{filenames} (#{e.to_s[0..120]})"
  end
  exit(1)
end
REXML::XPath.each(dr, "//sourcecode|//artwork") do |x|
  if ty = x[:type]
    is_svg = false
    REXML::XPath.each(x, "svg") do is_svg = true end
    ty = clean(ty)
    if is_svg && ty != "svg"
      warn "** replacing SVG type '#{ty}' by type 'svg'"
      ty = "svg"
    end
    ext = ty
    if ty.empty?
      ty = "=txt"
      ext = "txt"
    end
    name = x[:name]
    name_is_from_slug = false
    if !name || name.empty?
      REXML::XPath.each(x, "ancestor::*/name[position() = 1]") do |nameel|
        unless sname = nameel['slugifiedName']
          alltext = nameel.all_text
          nameel.add_attribute('slugifiedName',
                               sname = clean("name-" << slugify_like_xml2rfc(alltext, 5)))
        end
        name = clean(sname.to_s.sub(/^name-/, ''))
        name_is_from_slug = true
      end
      if !name || name.empty?
        name = gensym.succ!.dup
      end
      name = "#{name}.#{ext}"
    end
    name = clean(name)
    if taken[ty][name]
      if name_is_from_slug || is_svg
        # rename old entry as well if it was from slug?
        nameparts = name.split(".")
        if is_svg && nameparts[-1] != "svg"
          nameparts << "svg"
        end
        ext1 = nameparts.pop || "txt"
        suffix = "b"
        while taken[ty][name = [*nameparts[0...-1], "#{nameparts[-1]}-#{suffix}", ext1].join(".")]
          suffix.succ!
        end
        taken[ty][name] = ''
      else
        unless warned[ty][name]
          warn "Concatenating to #{ty}/#{name}."
          warned[ty][name] = true
        end
      end
    else
      taken[ty][name] = ''
    end
    extracted = false
    REXML::XPath.each(x, "svg") do |svg|
      # According to v3.rnc, there should be only one...
      taken[ty][name] << svg.to_s
      extracted = true
    end
    unless extracted
      taken[ty][name] << handle_artwork_sourcecode(x.all_text, unfold)
    end
  end
end

def make_directory_from(dir, taken)
  if File.exist?(dir)
    bak = "#{dir}.bak"
    begin
      FileUtils.mv(dir, bak)
    rescue Errno::EEXIST
      bak.succ!
      retry
    end
  end
  FileUtils.mkdir_p(dir)
  taken.each { |dir1, v|
    FileUtils.mkdir_p("#{dir}/#{dir1}")
    v.each { |fn, value|
      IO.write("#{dir}/#{dir1}/#{fn}", value)
    }
  }
end

case target
when :stdout
  if dataset = taken[out_type]
    unless out_name
      case dataset.size
      when 0
        warn "No sourcecodes under #{out_type}"
        exit(1)
      when 1
        out_name ||= dataset.keys.first
      else
        warn "Multiple sourcecodes under #{out_type}: #{dataset.keys}"
        exit(1)
      end
    end
    if data = dataset[out_name]
      puts data
      exit
    end
  end
  warn "*** Cannot find sourcecode #{out_type}/#{out_name}"
  exit(1)
when :yaml
  puts taken.to_yaml
when :list
  puts Hash[
         taken.map {|k, v|
           [k, v.keys]
         }
       ].to_yaml
when :files
  make_directory_from(dir, taken)
when :zip
  make_directory_from(dir, taken)
  zip = "#{dir}.zip"
  if File.exist?(zip)
    bak = "#{zip}.bak"
    begin
      FileUtils.mv(zip, bak)
    rescue Errno::EEXIST        # This doesn't actually happen.  XXX
      bak.succ!
      retry
    end
  end
  cmd = ["zip", "-mr", zip, dir].shelljoin
  warn cmd
  system(cmd)
end
