#!/usr/bin/env perl
# author: joe.zheng

use strict;

use Getopt::Long;
use File::Basename;

my $self = basename($0);
my $version = 'v0.5';

sub M { local $\ = "\n"; print STDERR "$self: ", @_ }

sub usage {
  print <<"EOF";
Usage: $self [-h] [-r] [-t] [-v] [<args>...]
  Convert TiddlyWiki5 to Markdown or vice versa

  -r:   reversely
  -t:   self test
  -v:   show version
  -h:   help message

Examples:

  1. $self -t
    Self-test, from TW5 to Markdown

  2. $self -t -r
    Self-test reversely, Markdown to TW5

  3. echo '!! h2' | $self
    Convert '!! h2' from TW5 to Markdown

  4. $self tiddly.wiki > tiddly.md
    Convert tiddly.wiki in TW5 format to tiddly.md

  5. $self -r tiddly.md wiki.md
    Convert these two files in Markdown format to TW5, output to STDOUT

EOF

  exit;
}

# main

my ($help, $reverse, $test, $show_version);
GetOptions(
  "help|h"    => \$help,
  "reverse|r" => \$reverse,
  "test|t"    => \$test,
  "version|v" => \$show_version,
) or usage();

usage() if $help;
print("$version\n") and exit if $show_version;

M $reverse ? "Markdown -> TiddlyWiki" : "TiddlyWiki -> Markdown";

if ($test) {
  self_test($reverse)
} else {
  unshift(@ARGV, '-') unless @ARGV;
  foreach my $f (@ARGV) {
    local *STDIN;
    open STDIN, '<', $f or die "$!";

    convert($reverse)
  }
}

sub convert {
  my $reverse = shift || 0;
  my %ctx = shift || {};

  my $re_indent = qr/^(?:[ ]{4}|\t)/;

  $ctx{'cb_md2tw_ext_link'} = sub {
    my ($name, $link) = @_;
    $link =~ m{^(?:[^:/?#]+):} ? "[[$name|$link]]" : "[ext[$name|$link]]"
  };

  while (<STDIN>) {
    if (/^\s*$/) {
      $ctx{'curr_line_blank'}++;
    } else {
      $ctx{'curr_line_blank'} = 0;
    }

    # both support code block with ```, leave as is
    if (m/^```\w*$/ ... m/^```\s*$/) {
      print and next;
    }

    if ($reverse) {
      # md2tw
      # code block with indent
      if ($ctx{'in_code_block_indent'}) {
        if (/$re_indent/) {
          print substr($_, 4) and next;
        } else {
          # stop the code block
          print "```\n";
          $ctx{'in_code_block_indent'} = 0;
        }
      } else {
        # start new code block
        if ($ctx{'last_line_blank'} && /$re_indent/) {
          print "```\n";
          $ctx{'in_code_block_indent'}++;
          print substr($_, 4) and next;
        }
      }

      # headers
      s|^(\s*)(#+)|$1 . '!' x length($2)|eg;
      # unordered list, good enough
      s|^(\s*)[*+-]\s+|'*' x (length($1)/2) . '* '|eg;
      # ordered list, good enough
      s|^(\s*)\d[.)]\s+|'#' x (length($1)/2) . '# '|eg;
      # blockquotes, the same, clean up
      s/^(?:\s*)(>+)/$1/g;

      # wiki link, partially
      s|\[([^\]]+?)\](?:\[\])?(?!\()|[[$1]]|g;
      # reference link, drop reference
      s|\[([^\]]+?)\]\[([^\]]+)\](?!\()|[[$1]]|g;
      # url link
      # md no ext link, let us convert based on url schema
      s/(?<!\!)\[([^\]]+?)\]\(([^\)]+?)\)/$ctx{'cb_md2tw_ext_link'}($1,$2)/eg;
      # image link
      s/!\[([^\]]+?)\]\(([^\)]+?)\)/[img[$1|$2]]/g;
      s/!\[\]\(([^\)]+?)\)/[img[$1]]/g;
      # image link with reference, drop reference
      s|!\[([^\]]+?)\]\[([^\]]+)\](?!\()|[img[$1]]|g;

      # md no underline
      s|<u>([^<]+?)</u>|__${1}__|g;
      # bold
      s/\*\*([^*]+?)\*\*/''$1''/g;
      # italic
      s|([_*])([^_*]+?)\1(?!\1)|//$2//|g;
      # md no superscript
      s|<sup>([^<]+?)</sup>|^^${1}^^|g;
      # md no subscripted
      s|<sub>([^<]+?)</sub>|,,${1},,|g;
      # strikethrough, the same
      # backticks code, the same

    } else {
      # tw2md

      # unordered list, good enough
      s/^(?:\s*)(\*+)\s*/'  ' x (length($1)-1) . '* '/eg;
      # ordered list, good enough
      s/^(?:\s*)(#+)\s*/'  ' x (length($1)-1) . '1. '/eg;
      # blockquotes, the same, clean up
      s/^(?:\s*)(>+)/$1/g;
      # headers
      # must after ordered list is handled, otherwise will double convert
      s/^(\s*)(!+)/$1 . '#' x length($2)/eg;

      # wiki link, partially
      s|\[\[([^\|\]]+?)\]\]|[$1][]|g;
      # url link
      s/\[\[([^\|\]]+?)\|([^\]]+?)\]\]/[$1]($2)/g;
      # ext link
      s/\[ext\[([^\|\]]+?)\]\]/[$1]($1)/g;
      s/\[ext\[([^\|\]]+?)\|([^\]]+?)\]\]/[$1]($2)/g;
      # image link, drop info can't supported by md
      s/\[img(?:[^\[]*?)\[([^\|\]]+?)\]\]/![]($1)/g;
      s/\[img(?:[^\[]*?)\[([^\|\]]+?)\|([^\]]+?)\]\]/![$1]($2)/g;

      # underline
      s|__([^_]+?)__|<u>$1</u>|g;
      # bold
      s|''([^']+?)''|**$1**|g;
      # italic
      s|//([^/]+?)//|_$1_|g;
      # superscript
      s|\^\^([^\^]+?)\^\^|<sup>$1</sup>|g;
      # subscripted
      s|,,([^,]+?),,|<sub>$1</sub>|g;
      # strikethrough, the same
      # backticks code, the same
    }

    print;
  } continue {
    if ($ctx{'curr_line_blank'}) {
      $ctx{'last_line_blank'}++;
    } else {
      $ctx{'last_line_blank'} = 0;
    }
    if (eof) {
      print "\n```\n" if $ctx{'in_code_block_indent'};
    }
  }
}

sub self_test {
  my $reverse = shift;

  M "self_test: conversion is asymmetric, failures may be false positive";

  my $data_start = tell DATA;
  my $data = do { local $/ = undef; <DATA> };
  seek DATA, $data_start, 0;

  my ($source, $target) = split(/^@@\s*\r?\n/m, $data);
  ($source, $target) = ($target, $source) if $reverse;

  my $result;
  do {
    local (*STDIN, *STDOUT);
    open STDIN, '<', \$source or die;
    open STDOUT, '>', \$result or die;

    convert($reverse);
  };

  my @s = split /\n/, $source;
  my @r = split /\n/, $result;
  my @t = split /\n/, $target;

  for my $i (0 .. $#t) {
    if ($r[$i] ne $t[$i]) {
      print(qq/KO: $s[$i]\n -> $r[$i]\n != $t[$i]\n/);
    } else {
      print(qq/OK: $s[$i]\n -> $r[$i]\n/);
    }
  }

  exit;
}

__DATA__
`backticks` for inline code
''bold'' for bold text
//italic// for italic text
__underscore__ for underscored text
^^superscript^^ for superscript text
,,subscript,, for subscripted text
~~strikethrough~~ for strikethrough text

! heading level 1
!! heading level 2
!!! heading level 3
* unordered list level 1
** unordered list level 2
*** unordered list level 3
# ordered list level 1
## ordered list level 2
### ordered list level 3
> blockquotes level 1
>> blockquotes level 2

[[TiddlyWiki]]
[[TiddlyWiki|https://tiddlywiki.com]]
[ext[joez.html]]
[ext[JoezWiki|joez.html]]
[img[https://tiddlywiki.com/favicon.ico]]
[img[An explanatory tooltip|https://tiddlywiki.com/favicon.ico]]
[img width=32 [https://tiddlywiki.com/favicon.ico]]

```
code block

    # code **should not** be changed
```

```perl
    # another code block
```
    not a code block with indent

```
this is a code block with indent
```
@@
`backticks` for inline code
**bold** for bold text
_italic_ for italic text
<u>underscore</u> for underscored text
<sup>superscript</sup> for superscript text
<sub>subscript</sub> for subscripted text
~~strikethrough~~ for strikethrough text

# heading level 1
## heading level 2
### heading level 3
* unordered list level 1
  * unordered list level 2
    * unordered list level 3
1. ordered list level 1
  1. ordered list level 2
    1. ordered list level 3
> blockquotes level 1
>> blockquotes level 2

[TiddlyWiki][]
[TiddlyWiki](https://tiddlywiki.com)
[joez.html](joez.html)
[JoezWiki](joez.html)
![](https://tiddlywiki.com/favicon.ico)
![An explanatory tooltip](https://tiddlywiki.com/favicon.ico)
![](https://tiddlywiki.com/favicon.ico)

```
code block

    # code **should not** be changed
```

```perl
    # another code block
```
    not a code block with indent

    this is a code block with indent