summaryrefslogtreecommitdiff
path: root/docs/modules/migrate/examples/convert.groovy
blob: d1f43ac70b7068d7d618a8ae3ca048062cb17a4a (plain) (blame)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
// This script is provided by melix.
// The source can be found at https://gist.github.com/melix/6020336

@Grab('net.sourceforge.htmlcleaner:htmlcleaner:2.4')
import org.htmlcleaner.*

def src = new File('html').toPath()
def dst = new File('asciidoc').toPath()

def cleaner = new HtmlCleaner()
def props = cleaner.properties
props.translateSpecialEntities = false
def serializer = new SimpleHtmlSerializer(props)

src.toFile().eachFileRecurse { f ->
    def relative = src.relativize(f.toPath())
    def target = dst.resolve(relative)
    if (f.isDirectory()) {
        target.toFile().mkdir()
    } else if (f.name.endsWith('.html')) {
        def tmpHtml = File.createTempFile('clean', 'html')
        println "Converting $relative"
        def result = cleaner.clean(f)
        result.traverse({ tagNode, htmlNode ->
                tagNode?.attributes?.remove 'class'
                if ('td' == tagNode?.name || 'th'==tagNode?.name) {
                    tagNode.name='td'
                    String txt = tagNode.text
                    tagNode.removeAllChildren()
                    tagNode.insertChild(0, new ContentNode(txt))
                }

            true
        } as TagNodeVisitor)
        serializer.writeToFile(
                result, tmpHtml.absolutePath, "utf-8"
        )
        "pandoc -f html-native_divs -t asciidoctor $tmpHtml --wrap=none -o ${target}.adoc".execute().waitFor()
        tmpHtml.delete()
    }/* else {
        "cp html/$relative $target".execute()
    }*/
}