// This script is provided by melix. // The source can be found at https://gist.github.com/melix/6020336 @Grab('net.sourceforge.htmlcleaner:htmlcleaner:2.4') import org.htmlcleaner.* def src = new File('html').toPath() def dst = new File('asciidoc').toPath() def cleaner = new HtmlCleaner() def props = cleaner.properties props.translateSpecialEntities = false def serializer = new SimpleHtmlSerializer(props) src.toFile().eachFileRecurse { f -> def relative = src.relativize(f.toPath()) def target = dst.resolve(relative) if (f.isDirectory()) { target.toFile().mkdir() } else if (f.name.endsWith('.html')) { def tmpHtml = File.createTempFile('clean', 'html') println "Converting $relative" def result = cleaner.clean(f) result.traverse({ tagNode, htmlNode -> tagNode?.attributes?.remove 'class' if ('td' == tagNode?.name || 'th'==tagNode?.name) { tagNode.name='td' String txt = tagNode.text tagNode.removeAllChildren() tagNode.insertChild(0, new ContentNode(txt)) } true } as TagNodeVisitor) serializer.writeToFile( result, tmpHtml.absolutePath, "utf-8" ) "pandoc -f html -t asciidoc -R -S --normalize -s $tmpHtml -o ${target}.adoc".execute().waitFor() tmpHtml.delete() }/* else { "cp html/$relative $target".execute() }*/ }