[Cryptech-Commits] [user/sra/pelican] 01/68: Initial wiki dump and initial tools
git at cryptech.is
git at cryptech.is
Mon Jul 19 22:24:40 UTC 2021
This is an automated email from the git hooks/post-receive script.
sra at hactrn.net pushed a commit to branch pelican
in repository user/sra/pelican.
commit 3e7b8b209060988ed020f0eda33b1a2f7c292be7
Author: Rob Austein <sra at hactrn.net>
AuthorDate: Mon Mar 18 20:44:57 2019 +0000
Initial wiki dump and initial tools
---
tools/convert-and-slurp-attachments.sh | 18 ++
tools/extract-wiki-content.xsl | 177 +++++++++++++++++
tools/rpki-wiki-to-markdown.py | 341 +++++++++++++++++++++++++++++++++
tools/trac-wiki-to-markdown.rb | 51 +++++
tools/trac2down.py | 61 ++++++
tools/trac2md.py | 192 +++++++++++++++++++
6 files changed, 840 insertions(+)
diff --git a/tools/convert-and-slurp-attachments.sh b/tools/convert-and-slurp-attachments.sh
new file mode 100755
index 0000000..ce7f34d
--- /dev/null
+++ b/tools/convert-and-slurp-attachments.sh
@@ -0,0 +1,18 @@
+#!/bin/sh -
+
+ls | fgrep -v . |
+while read page
+do
+ base="https://trac.rpki.net"
+ path="/wiki/$(echo $page | sed s=%2F=/=g)"
+
+ # Fetch the Wiki page, extract the useful portion of the HTML, convert that into Markdown
+ curl "${base}${path}" |
+ xsltproc --html extract-wiki-content.xsl - |
+ html2markdown --no-skip-internal-links --reference-links >"$page.md"
+
+ # Fetch a ZIP file containing any attachments, clean up if result is empty or broken
+ curl "${base}/zip-attachment${path}/" >"$page.zip"
+ zipinfo "$page.zip" >/dev/null 2>&1 || rm -f "$page.zip"
+
+done
diff --git a/tools/extract-wiki-content.xsl b/tools/extract-wiki-content.xsl
new file mode 100644
index 0000000..e4376e8
--- /dev/null
+++ b/tools/extract-wiki-content.xsl
@@ -0,0 +1,177 @@
+<!--
+ - XSL transform to extract useful content of a Trac Wiki page.
+ -
+ - Django generates weird HTML for ordered lists: it sometimes breaks
+ - up a single ordered list into multiple adjacent <ol/> elements,
+ - using the @start attribute to try to make the result look like a
+ - single ordered list. This looks OK in Firefox but confuses the
+ - bejesus out of both html2markdown and htmldoc. In some cases this is
+ - probably unavoidable, but most of the uses of this I've seen look
+ - gratuitous, and are probably the result of code modulararity issues
+ - in Django.
+ -
+ - So we try to clean this up, by merging adjacent <ol/> elements where
+ - we can. The merge incantation is an adaptation of:
+ -
+ - http://stackoverflow.com/questions/1806123/merging-adjacent-nodes-of-same-type-xslt-1-0
+ -
+ - There may be a more efficient way to do this, but I don't think
+ - we care, and this seems to work.
+ -
+ - Original author's explanation:
+ -
+ - The rather convoluted XPath expression for selecting the following
+ - sibling aaa nodes which are merged with the current one:
+ -
+ - following-sibling::aaa[ # following 'aaa' siblings
+ - not(preceding-sibling::*[ # if they are not preceded by
+ - not(self::aaa) and # a non-'aaa' node
+ - not(following-sibling::aaa = current()) # after the current node
+ - ])
+ - ]
+ -->
+
+ <xsl:transform xmlns:xsl="http://www.w3.org/1999/XSL/Transform" version="1.0">
+
+ <xsl:output method="xml" encoding="us-ascii" omit-xml-declaration="yes" />
+
+ <xsl:param name="basename"/>
+ <xsl:param name="path"/>
+
+ <xsl:template match="/">
+ <xsl:message><xsl:value-of select="concat('Got path: ', $path)"/></xsl:message>
+ <xsl:variable name="id">
+ <xsl:call-template name="path-to-id">
+ <xsl:with-param name="p" select="$path"/>
+ </xsl:call-template>
+ </xsl:variable>
+ <xsl:message><xsl:value-of select="concat('Got id: ', $id)"/></xsl:message>
+ <xsl:comment>NEW PAGE</xsl:comment>
+ <html>
+ <body>
+ <div id="{$id}">
+ <xsl:apply-templates select="//div[@id = 'wikipage']/*"/>
+ </div>
+ </body>
+ </html>
+ </xsl:template>
+
+ <xsl:template match="//div[contains(@class, 'wiki-toc')]"/>
+
+ <xsl:template match="//span[@class = 'icon' and not(*)]"/>
+
+ <xsl:template match="a[contains(@class, 'wiki') and
+ starts-with(@href, '/wiki/')]">
+ <xsl:variable name="href">
+ <xsl:call-template name="path-to-id">
+ <xsl:with-param name="p" select="@href"/>
+ </xsl:call-template>
+ </xsl:variable>
+ <a href="#{$href}">
+ <xsl:apply-templates select="@*[name() != 'href']"/>
+ <xsl:apply-templates/>
+ </a>
+ </xsl:template>
+
+ <xsl:template match="a[starts-with(@href, '/attachment/wiki/')]">
+ <a href="{concat($basename, @href)}">
+ <xsl:apply-templates select="@*[name() != 'href']"/>
+ <xsl:apply-templates/>
+ </a>
+ </xsl:template>
+
+ <xsl:template match="img[starts-with(@src, '/raw-attachment/wiki/')]">
+ <img src="{concat($basename, @src)}">
+ <xsl:apply-templates select="@*[name() != 'src']"/>
+ <xsl:apply-templates/>
+ </img>
+ </xsl:template>
+
+ <xsl:template match="object[starts-with(@data, '/raw-attachment/wiki/') or
+ starts-with(@data, '/graphviz/')]">
+ <object data="{concat($basename, @data)}">
+ <xsl:apply-templates select="@*[name() != 'data']"/>
+ <xsl:apply-templates/>
+ </object>
+ </xsl:template>
+
+ <xsl:template match="embed[starts-with(@src, '/raw-attachment/wiki/') or
+ starts-with(@src, '/graphviz/')]">
+ <embed src="{concat($basename, @src)}">
+ <xsl:apply-templates select="@*[name() != 'src']"/>
+ <xsl:apply-templates/>
+ </embed>
+ </xsl:template>
+
+ <xsl:template match="text()[contains(., '')]">
+ <xsl:call-template name="remove-zero-width-spaces">
+ <xsl:with-param name="s" select="."/>
+ </xsl:call-template>
+ </xsl:template>
+
+ <xsl:template match="@*|node()">
+ <xsl:copy>
+ <xsl:copy-of select="@*"/>
+ <xsl:apply-templates/>
+ </xsl:copy>
+ </xsl:template>
+
+ <xsl:template name="path-to-id">
+ <xsl:param name="p"/>
+ <xsl:text>_</xsl:text>
+ <xsl:call-template name="replace">
+ <xsl:with-param name="s" select="$p"/>
+ <xsl:with-param name="old">/</xsl:with-param>
+ <xsl:with-param name="new">.</xsl:with-param>
+ </xsl:call-template>
+ </xsl:template>
+
+ <xsl:template name="remove-zero-width-spaces">
+ <xsl:param name="s"/>
+ <xsl:call-template name="replace">
+ <xsl:with-param name="s" select="$s"/>
+ <xsl:with-param name="old"></xsl:with-param>
+ <xsl:with-param name="new"/>
+ </xsl:call-template>
+ </xsl:template>
+
+ <xsl:template name="replace">
+ <xsl:param name="s"/>
+ <xsl:param name="old"/>
+ <xsl:param name="new"/>
+ <xsl:choose>
+ <xsl:when test="contains($s, $old)">
+ <xsl:call-template name="replace">
+ <xsl:with-param name="s" select="concat(substring-before($s, $old),
+ $new,
+ substring-after($s, $old))"/>
+ <xsl:with-param name="old" select="$old"/>
+ <xsl:with-param name="new" select="$new"/>
+ </xsl:call-template>
+ </xsl:when>
+ <xsl:otherwise>
+ <xsl:value-of select="$s"/>
+ </xsl:otherwise>
+ </xsl:choose>
+ </xsl:template>
+
+ <xsl:template match="ol">
+ <xsl:if test="not(preceding-sibling::*[1]/self::ol)">
+ <xsl:variable name="following"
+ select="following-sibling::ol[
+ not(preceding-sibling::*[
+ not(self::ol) and
+ not(following-sibling::ol = current())
+ ])
+ ]"/>
+ <xsl:copy>
+ <xsl:apply-templates select="$following/@*[name() != 'start']"/>
+ <xsl:apply-templates select="@*"/>
+ <xsl:apply-templates select="node()"/>
+ <xsl:apply-templates select="$following/node()"/>
+ </xsl:copy>
+ </xsl:if>
+ </xsl:template>
+
+ </xsl:transform>
+
diff --git a/tools/rpki-wiki-to-markdown.py b/tools/rpki-wiki-to-markdown.py
new file mode 100644
index 0000000..dff87e6
--- /dev/null
+++ b/tools/rpki-wiki-to-markdown.py
@@ -0,0 +1,341 @@
+# Copyright (C) 2016 Parsons Government Services ("PARSONS")
+# Portions copyright (C) 2014 Dragon Research Labs ("DRL")
+# Portions copyright (C) 2012 Internet Systems Consortium ("ISC")
+#
+# Permission to use, copy, modify, and distribute this software for any
+# purpose with or without fee is hereby granted, provided that the above
+# copyright notices and this permission notice appear in all copies.
+#
+# THE SOFTWARE IS PROVIDED "AS IS" AND PARSONS, DRL, AND ISC DISCLAIM
+# ALL WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED
+# WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL
+# PARSONS, DRL, OR ISC BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR
+# CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS
+# OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT,
+# NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION
+# WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+
+"""
+Trac Wiki -> Markdown converter, hacked from old Trac Wiki -> PDF/flat
+text converter.
+
+Pull HTML pages from a Trac Wiki, feed the useful bits to
+html2text to generate Markdown.
+
+Assumes you're using the TracNav plugin for the Wiki pages, and uses
+the same list as the TracNav plugin does to determine the set of pages
+to convert.
+"""
+
+# Dependencies, at least on Ubuntu Xenial:
+#
+# apt-get install python-lxml python-html2text
+#
+# Be warned that there are many unrelated packages named "html2text",
+# installed under various names on various platforms. This one
+# happens to be a useful HTML-to-Markdown converter.
+
+# Most of the work of massaging the HTML is done using XSL transforms,
+# because the template-driven style makes that easy. There's probably
+# some clever way to use lxml's XPath code to do the same thing in a
+# more pythonic way with ElementTrees, but I already had the XSL
+# transforms and there's a point of diminishing returns on this sort of
+# thing.
+
+import sys
+import os
+import argparse
+import lxml.etree
+import urllib
+import urlparse
+import subprocess
+import zipfile
+
+# Main program, up front so it doesn't get lost under all the XSL
+
+def main():
+
+ base = "https://trac.rpki.net"
+
+ parser = argparse.ArgumentParser(description = __doc__, formatter_class = argparse.ArgumentDefaultsHelpFormatter)
+ parser.add_argument("-b", "--base_url",
+ default = base,
+ help = "base URL for documentation web site")
+ parser.add_argument("-t", "--toc",
+ default = base + "/wiki/doc/RPKI/TOC",
+ help = "table of contents URL")
+ parser.add_argument("-d", "--directory",
+ default = ".",
+ help = "output directory")
+ parser.add_argument("-p", "--prefix",
+ default = "/wiki/doc",
+ help = "page name prefix on wiki")
+ args = parser.parse_args()
+
+ urls = str(xsl_get_toc(lxml.etree.parse(urllib.urlopen(args.toc)).getroot(),
+ basename = repr(args.base_url))).splitlines()
+
+ assert all(urlparse.urlparse(url).path.startswith(args.prefix) for url in urls)
+
+ for pagenum, url in enumerate(urls):
+ path = urlparse.urlparse(url).path
+ page = xsl_get_page(lxml.etree.parse(urllib.urlopen(url)).getroot(),
+ basename = repr(args.base_url),
+ path = repr(path))
+
+ fn_base = os.path.join(args.directory, "{:02d}{}".format(pagenum, path[len(args.prefix):].replace("/", ".")))
+
+ fn = fn_base + ".zip"
+ zip_url = urlparse.urljoin(url, "/zip-attachment{}/".format(path))
+ urllib.urlretrieve(zip_url, fn)
+ with zipfile.ZipFile(fn, "r") as z:
+ if len(z.namelist()) == 0:
+ os.unlink(fn)
+ else:
+ sys.stderr.write("Wrote {}\n".format(fn))
+
+ for imgnum, img in enumerate(page.xpath("//img | //object | //embed")):
+ img_url = img.get("data" if img.tag == "object" else "src")
+ img_url = urlparse.urljoin(url, img_url)
+ fn = "{}.{:02d}{}".format(fn_base, imgnum, os.path.splitext(img_url)[1])
+ urllib.urlretrieve(img_url, fn)
+ sys.stderr.write("Wrote {}\n".format(fn))
+
+ html2markdown = subprocess.Popen(("html2markdown", "--no-skip-internal-links", "--reference-links"),
+ stdin = subprocess.PIPE, stdout = subprocess.PIPE)
+ page.write(html2markdown.stdin)
+ html2markdown.stdin.close()
+ lines = html2markdown.stdout.readlines()
+ html2markdown.stdout.close()
+ html2markdown.wait()
+
+ while lines and lines[0].isspace():
+ del lines[0]
+
+ fn = fn_base + ".md"
+ with open(fn, "w") as f:
+ want_blank = False
+ for line in lines:
+ blank = line.isspace()
+ if want_blank and not blank:
+ f.write("\n")
+ if not blank:
+ f.write(line)
+ want_blank = blank
+ sys.stderr.write("Wrote {}\n".format(fn))
+
+ fn = fn[:-3] + ".wiki"
+ urllib.urlretrieve(url + "?format=txt", fn)
+ sys.stderr.write("Wrote {}\n".format(fn))
+
+
+# XSL transform to extract list of Wiki page URLs from the TOC Wiki page
+
+xsl_get_toc = lxml.etree.XSLT(lxml.etree.XML('''\
+ <xsl:transform xmlns:xsl="http://www.w3.org/1999/XSL/Transform"
+ version="1.0">
+
+ <xsl:output method="text" encoding="us-ascii"/>
+
+ <xsl:param name="basename"/>
+
+ <xsl:template match="/">
+ <xsl:for-each select="//div[@id = 'wikipage']/ul//a">
+ <xsl:value-of select="concat($basename, @href, '
')"/>
+ </xsl:for-each>
+ </xsl:template>
+
+ </xsl:transform>
+'''))
+
+# XSL transform to extract useful content of a Wiki page.
+
+# Django generates weird HTML for ordered lists: it sometimes breaks
+# up a single ordered list into multiple adjacent <ol/> elements,
+# using the @start attribute to try to make the result look like a
+# single ordered list. This looks OK in Firefox but confuses the
+# bejesus out of both html2markdown and htmldoc. In some cases this is
+# probably unavoidable, but most of the uses of this I've seen look
+# gratuitous, and are probably the result of code modulararity issues
+# in Django.
+#
+# So we try to clean this up, by merging adjacent <ol/> elements where
+# we can. The merge incantation is an adaptation of:
+#
+# http://stackoverflow.com/questions/1806123/merging-adjacent-nodes-of-same-type-xslt-1-0
+#
+# There may be a more efficient way to do this, but I don't think
+# we care, and this seems to work.
+#
+# Original author's explanation:
+#
+# The rather convoluted XPath expression for selecting the following
+# sibling aaa nodes which are merged with the current one:
+#
+# following-sibling::aaa[ # following 'aaa' siblings
+# not(preceding-sibling::*[ # if they are not preceded by
+# not(self::aaa) and # a non-'aaa' node
+# not(following-sibling::aaa = current()) # after the current node
+# ])
+# ]
+
+xsl_get_page = lxml.etree.XSLT(lxml.etree.XML('''\
+ <xsl:transform xmlns:xsl="http://www.w3.org/1999/XSL/Transform" version="1.0">
+
+ <xsl:output method="xml" encoding="us-ascii" omit-xml-declaration="yes" />
+
+ <xsl:param name="basename"/>
+ <xsl:param name="path"/>
+
+ <xsl:template match="/">
+ <xsl:message><xsl:value-of select="concat('Got path: ', $path)"/></xsl:message>
+ <xsl:variable name="id">
+ <xsl:call-template name="path-to-id">
+ <xsl:with-param name="p" select="$path"/>
+ </xsl:call-template>
+ </xsl:variable>
+ <xsl:message><xsl:value-of select="concat('Got id: ', $id)"/></xsl:message>
+ <xsl:comment>NEW PAGE</xsl:comment>
+ <html>
+ <body>
+ <div id="{$id}">
+ <xsl:apply-templates select="//div[@id = 'wikipage']/*"/>
+ </div>
+ </body>
+ </html>
+ </xsl:template>
+
+ <xsl:template match="//div[contains(@class, 'wiki-toc')]"/>
+
+ <xsl:template match="//span[@class = 'icon' and not(*)]"/>
+
+ <xsl:template match="a[contains(@class, 'wiki') and
+ starts-with(@href, '/wiki/')]">
+ <xsl:variable name="href">
+ <xsl:call-template name="path-to-id">
+ <xsl:with-param name="p" select="@href"/>
+ </xsl:call-template>
+ </xsl:variable>
+ <a href="#{$href}">
+ <xsl:apply-templates select="@*[name() != 'href']"/>
+ <xsl:apply-templates/>
+ </a>
+ </xsl:template>
+
+ <xsl:template match="a[starts-with(@href, '/attachment/wiki/')]">
+ <a href="{concat($basename, @href)}">
+ <xsl:apply-templates select="@*[name() != 'href']"/>
+ <xsl:apply-templates/>
+ </a>
+ </xsl:template>
+
+ <xsl:template match="img[starts-with(@src, '/raw-attachment/wiki/')]">
+ <img src="{concat($basename, @src)}">
+ <xsl:apply-templates select="@*[name() != 'src']"/>
+ <xsl:apply-templates/>
+ </img>
+ </xsl:template>
+
+ <xsl:template match="object[starts-with(@data, '/raw-attachment/wiki/') or
+ starts-with(@data, '/graphviz/')]">
+ <object data="{concat($basename, @data)}">
+ <xsl:apply-templates select="@*[name() != 'data']"/>
+ <xsl:apply-templates/>
+ </object>
+ </xsl:template>
+
+ <xsl:template match="embed[starts-with(@src, '/raw-attachment/wiki/') or
+ starts-with(@src, '/graphviz/')]">
+ <embed src="{concat($basename, @src)}">
+ <xsl:apply-templates select="@*[name() != 'src']"/>
+ <xsl:apply-templates/>
+ </embed>
+ </xsl:template>
+
+ <xsl:template match="text()[contains(., '')]">
+ <xsl:call-template name="remove-zero-width-spaces">
+ <xsl:with-param name="s" select="."/>
+ </xsl:call-template>
+ </xsl:template>
+
+ <xsl:template match="@*|node()">
+ <xsl:copy>
+ <xsl:copy-of select="@*"/>
+ <xsl:apply-templates/>
+ </xsl:copy>
+ </xsl:template>
+
+ <xsl:template name="path-to-id">
+ <xsl:param name="p"/>
+ <xsl:text>_</xsl:text>
+ <xsl:call-template name="replace">
+ <xsl:with-param name="s" select="$p"/>
+ <xsl:with-param name="old">/</xsl:with-param>
+ <xsl:with-param name="new">.</xsl:with-param>
+ </xsl:call-template>
+ </xsl:template>
+
+ <xsl:template name="remove-zero-width-spaces">
+ <xsl:param name="s"/>
+ <xsl:call-template name="replace">
+ <xsl:with-param name="s" select="$s"/>
+ <xsl:with-param name="old"></xsl:with-param>
+ <xsl:with-param name="new"/>
+ </xsl:call-template>
+ </xsl:template>
+
+ <xsl:template name="replace">
+ <xsl:param name="s"/>
+ <xsl:param name="old"/>
+ <xsl:param name="new"/>
+ <xsl:choose>
+ <xsl:when test="contains($s, $old)">
+ <xsl:call-template name="replace">
+ <xsl:with-param name="s" select="concat(substring-before($s, $old),
+ $new,
+ substring-after($s, $old))"/>
+ <xsl:with-param name="old" select="$old"/>
+ <xsl:with-param name="new" select="$new"/>
+ </xsl:call-template>
+ </xsl:when>
+ <xsl:otherwise>
+ <xsl:value-of select="$s"/>
+ </xsl:otherwise>
+ </xsl:choose>
+ </xsl:template>
+
+ <xsl:template match="ol">
+ <xsl:if test="not(preceding-sibling::*[1]/self::ol)">
+ <xsl:variable name="following"
+ select="following-sibling::ol[
+ not(preceding-sibling::*[
+ not(self::ol) and
+ not(following-sibling::ol = current())
+ ])
+ ]"/>
+ <xsl:copy>
+ <xsl:apply-templates select="$following/@*[name() != 'start']"/>
+ <xsl:apply-templates select="@*"/>
+ <xsl:apply-templates select="node()"/>
+ <xsl:apply-templates select="$following/node()"/>
+ </xsl:copy>
+ </xsl:if>
+ </xsl:template>
+
+ </xsl:transform>
+'''))
+
+# All the files we want to parse are HTML, so make HTML the default
+# parser. In theory the HTML produced by Trac is XHTML thus should
+# parse correctly (in fact, better) as XML, but in practice this seems
+# not to work properly at the moment, while parsing as HTML does.
+# Haven't bothered to figure out why, life is too short.
+#
+# If you're reading this comment because this script stopped working
+# after a Trac upgrade, try commenting out this line to see whether
+# things have changed and Trac's HTML now parses better as XML.
+
+lxml.etree.set_default_parser(lxml.etree.HTMLParser())
+
+# Run the main program.
+main()
diff --git a/tools/trac-wiki-to-markdown.rb b/tools/trac-wiki-to-markdown.rb
new file mode 100644
index 0000000..f7d41ae
--- /dev/null
+++ b/tools/trac-wiki-to-markdown.rb
@@ -0,0 +1,51 @@
+# Untested code snippet from https://gist.github.com/somebox/619537
+
+class String
+ def trac_to_markdown!
+ gsub!(/\{\{\{([^\n]+?)\}\}\}/, '`\1`')
+ gsub!(/\{\{\{(.+?)\}\}\}/m){|m| m.each_line.map{|x| "\t#{x}".gsub(/[\{\}]{3}/,'')}.join}
+ gsub!(/\=\=\=\=\s(.+?)\s\=\=\=\=/, '### \1')
+ gsub!(/\=\=\=\s(.+?)\s\=\=\=/, '## \1')
+ gsub!(/\=\=\s(.+?)\s\=\=/, '# \1')
+ gsub!(/\=\s(.+?)\s\=[\s\n]*/, '')
+ gsub!(/\[(http[^\s\[\]]+)\s([^\[\]]+)\]/, '[\2](\1)')
+ gsub!(/\!(([A-Z][a-z0-9]+){2,})/, '\1')
+ gsub!(/'''(.+)'''/, '*\1*')
+ gsub!(/''(.+)''/, '_\1_')
+ gsub!(/^\s\*/, '*')
+ gsub!(/^\s\d\./, '1.')
+
+ gsub!(/\{\{\{([^\n]+?)\}\}\}/, '`\1`')
+ gsub!(/'''(.+?)'''/, '**\1**')
+ gsub!(/''(.+?)''/, '*\1*')
+ gsub!(/((^\|\|[^\n\r]+\|\|[ \t]*\r?(\n|$))+)/m) do |m|
+ m = m.each_line.map do |x|
+ x.gsub(/\t/, ' ')
+ .gsub(/(\|\|){2,}/){|k| k.gsub(/\|\|/, '|| ')}
+ .gsub(/ {3,}/, ' ')
+ end.join
+ lines = m.each_line.to_a
+ line1 = lines.shift
+ line2 = line1.dup.gsub(/[^\n\r\|]/, '-')
+ lines.unshift(line1, line2)
+ c = lines.join
+ c = c.each_line.map do |x|
+ x.gsub(/\=\s?(.+?)\s?=/, ' \1 ')
+ .gsub(/\|\|/, '|')
+ end.join
+ end
+ gsub!(/^\{\{\{(.+?)^\}\}\}/m, '```\1```')
+ gsub!(/\=\=\=\=\s(.+?)\s\=\=\=\=/, '### \1')
+ gsub!(/\=\=\=\s(.+?)\s\=\=\=/, '## \1')
+ gsub!(/\=\=\s(.+?)\s\=\=/, '# \1')
+ gsub!(/\=\s(.+?)\s\=[\s\n]*/, '')
+ gsub!(/\[(http[^\s\[\]]+)\s([^\[\]]+)\]/, '[\2](\1)')
+ gsub!(/\!(([A-Z][a-z0-9]+){2,})/, '\1')
+ gsub!(/^\s\*/, '*')
+ gsub!(/^\s\d\./, '1.')
+ end
+end
+
+some_trac = 'my document'
+
+puts some_trac.trac_to_markdown!
diff --git a/tools/trac2down.py b/tools/trac2down.py
new file mode 100644
index 0000000..5bb9094
--- /dev/null
+++ b/tools/trac2down.py
@@ -0,0 +1,61 @@
+#!/usr/bin/python
+
+# Untested code from https://gist.githubusercontent.com/sgk/1286682/raw/b744dd2e47a68d60373ad39df87cfe8256f517af/trac2down.py
+
+# vim:set fileencoding=utf-8 sw=2 ai:
+
+import sqlite3
+import datetime
+import re
+
+SQL = '''
+ select
+ name, version, time, author, text
+ from
+ wiki w
+ where
+ version = (select max(version) from wiki where name = w.name)
+'''
+
+conn = sqlite3.connect('../trac.db')
+result = conn.execute(SQL)
+for row in result:
+ name = row[0]
+ version = row[1]
+ time = row[2]
+ author = row[3]
+ text = row[4]
+
+ text = re.sub('\r\n', '\n', text)
+ text = re.sub(r'{{{(.*?)}}}', r'`\1`', text)
+ def indent4(m):
+ return '\n ' + m.group(1).replace('\n', '\n ')
+ text = re.sub(r'(?sm){{{\n(.*?)\n}}}', indent4, text)
+ text = re.sub(r'(?m)^====\s+(.*?)\s+====$', r'#### \1', text)
+ text = re.sub(r'(?m)^===\s+(.*?)\s+===$', r'### \1', text)
+ text = re.sub(r'(?m)^==\s+(.*?)\s+==$', r'## \1', text)
+ text = re.sub(r'(?m)^=\s+(.*?)\s+=$', r'# \1', text)
+ text = re.sub(r'^ * ', r'****', text)
+ text = re.sub(r'^ * ', r'***', text)
+ text = re.sub(r'^ * ', r'**', text)
+ text = re.sub(r'^ * ', r'*', text)
+ text = re.sub(r'^ \d+. ', r'1.', text)
+
+ a = []
+ for line in text.split('\n'):
+ if not line.startswith(' '):
+ line = re.sub(r'\[(https?://[^\s\[\]]+)\s([^\[\]]+)\]', r'[\2](\1)', line)
+ line = re.sub(r'\[(wiki:[^\s\[\]]+)\s([^\[\]]+)\]', r'[\2](/\1/)', line)
+ line = re.sub(r'\!(([A-Z][a-z0-9]+){2,})', r'\1', line)
+ line = re.sub(r'\'\'\'(.*?)\'\'\'', r'*\1*', line)
+ line = re.sub(r'\'\'(.*?)\'\'', r'_\1_', line)
+ a.append(line)
+ text = '\n'.join(a)
+
+ fp = file('%s.md' % name, 'w')
+ print >>fp, '<!-- Name: %s -->' % name
+ print >>fp, '<!-- Version: %d -->' % version
+ print >>fp, '<!-- Last-Modified: %s -->' % datetime.datetime.fromtimestamp(time).strftime('%Y/%m/%d %H:%M:%S')
+ print >>fp, '<!-- Author: %s -->' % author
+ fp.write(text.encode('utf-8'))
+ fp.close()
diff --git a/tools/trac2md.py b/tools/trac2md.py
new file mode 100644
index 0000000..40c09d4
--- /dev/null
+++ b/tools/trac2md.py
@@ -0,0 +1,192 @@
+#!/usr/bin/python
+
+# Untested code from https://www.snip2code.com/Snippet/1704331/Convert-trac-markup-to-Markdown/
+
+# This code mostly taken from patches to pagure_importer by mreynolds
+
+import sys
+import re
+import time
+import requests
+import shutil
+import os
+from base64 import b64decode
+from datetime import datetime
+
+wikilink_pattern = re.compile('\[http(.*)\]')
+wikilink_extract = re.compile('\[(.*)\]')
+wikiheading1_pattern = re.compile('^= (.*) =$')
+wikiheading2_pattern = re.compile('^== (.*) ==$')
+wikiheading3_pattern = re.compile('^=== (.*) ===$')
+strikethrough_pattern = re.compile('~~(.*)~~')
+
+def to_timestamp(tm):
+ ''' Convert to timestamp which can be jsonified '''
+
+ tm = tm.replace('+00:00', '')
+ date = datetime.strptime(tm, '%Y-%m-%dT%H:%M:%S')
+ ts = str(time.mktime(date.timetuple()))[:-2] # Strip the .0
+ return ts
+
+
+def strip_wikilink(content):
+ ''' Need to remove wiki link format from custom fields. They come in a
+ variety of forms that can be comma or whitespace separated. They can also
+ include link names which must also be removed.
+
+ [https://bugzilla.redhat.com/show_bug.cgi?id=772777]
+ [https://bugzilla.com/123456789], [http://bugzilla.com/7777777 7777777]
+ [https://bugzilla.com/6666666 6666666]
+ '''
+
+ links = []
+ if wikilink_pattern.search(content):
+ # Looks like we have a link in here
+ links = []
+ mylist = re.findall(r'\[([^]]*)\]', content)
+ for i in mylist:
+ links.append(i.split(' ', 1)[0])
+ return ', '.join(links)
+ else:
+ return content
+
+
+def convert_headers(line):
+ ''' Convert wikiformat headers
+ '''
+ level_count = 1
+ for header in [wikiheading1_pattern,
+ wikiheading2_pattern,
+ wikiheading3_pattern]:
+ try:
+ level = header.search(line).group(1)
+ if level:
+ line = "%s %s" % ('#' * level_count, level)
+ break # No need to check other heading levels
+ except:
+ # Try the next heading level
+ pass
+ level_count += 1
+
+ return line
+
+
+def convert_wikilinks(line):
+ ''' Convert wikiformat links
+ '''
+ if wikilink_pattern.search(line):
+ try:
+ result = wikilink_extract.search(line).group(1)
+ if result:
+ parts = result.split(' ', 1)
+ if len(parts) == 1:
+ mdlink = '[%s](%s)' % (parts[0], parts[0])
+ elif len(parts) == 2:
+ mdlink = '[%s](%s)' % (parts[1], parts[0])
+ line = line.replace('[' + result + ']', mdlink)
+ except:
+ # Not a link, not a problem
+ pass
+
+ return line
+
+
+def convert_strike(line):
+ ''' Convert wikiformat striked text
+ '''
+ striked_result = strikethrough_pattern.search(line)
+ if striked_result:
+ try:
+ striked_text = striked_result.group(1)
+ if striked_text:
+ orig_text = '~~%s~~' % striked_text
+ new_text = '<s>%s</s>' % striked_text
+ line = line.replace(orig_text, new_text)
+ except:
+ # Not striked
+ pass
+ return line
+
+def WikiToMD(content):
+ ''' Convert wiki/RST format to Markdown. Code blocks, bold/italics,
+ wiki links, lists, striked text, and headers. '''
+
+ code_block = False
+ in_list = False
+ nested_level = 0
+ prev_indent = 0
+ new_content = ""
+
+ for line in content.split('\n'):
+ line = line.replace("\r", "")
+ if "{{{" in line:
+ code_block = True
+ line = line.replace("{{{", "```")
+ if "}}}" in line:
+ code_block = False
+ line = line.replace("}}}", "```")
+ if not code_block:
+ #
+ # Convert bullet lists. The start and end of a list needs
+ # an empty line. wikiformat uses both '*' and '-' for its
+ # lists. However, markdown only supports '-'.
+ #
+ if line.startswith('* '):
+ if not in_list:
+ new_content = "%s\n" % (new_content)
+ in_list = True
+ line = line[1:]
+ line = '-%s' % (line)
+ elif line.startswith('- '):
+ # No need to modify the line, just add the new line
+ if not in_list:
+ new_content = "%s\n" % (new_content)
+ in_list = True
+ elif line.startswith(' '):
+ # Check for nested lists
+ nested_line = line.lstrip(' ')
+ if nested_line.startswith('* ') or \
+ nested_line.startswith('- '):
+ # Adjust the nested list level as needed
+ indent = len(line) - len(nested_line)
+ if indent > prev_indent:
+ nested_level += 1
+ elif indent < prev_indent:
+ nested_level -= 1
+ prev_indent = indent
+
+ # Set the proper indentation for markdown
+ line = ('%s-%s' % (' ' * nested_level,
+ nested_line[1:]))
+ else:
+ if in_list:
+ # Add the closing empty line
+ new_content = "%s\n" % (new_content)
+ in_list = False
+ nested_level = 0
+ prev_indent = 0
+
+ # Convert headers
+ line = convert_headers(line)
+
+ # Convert wiki links
+ line = convert_wikilinks(line)
+
+ # Convert striked through text
+ line = convert_strike(line)
+
+ # Convert bold and italic text (do this last)
+ line = line.replace("'''", "**") # Convert bold text
+ line = line.replace("''", "*") # Convert italic text
+
+ new_content = "%s%s\n" % (new_content, line)
+
+ return new_content
+
+for f in sys.argv[1:]:
+ d = WikiToMD(open(f, "r").read())
+ newf = f.replace(".trac", ".md")
+ with open(newf, "w") as fp:
+ fp.write(d)
+ pass
+ pass
More information about the Commits
mailing list