Tidy HTML before trying to convert it with abiword

This commit is contained in:
Simon Gaeremynck 2015-05-18 16:24:41 +01:00
parent 9e9207d8b6
commit 786b43efc8
4 changed files with 87 additions and 35 deletions

View file

@ -90,6 +90,10 @@
Abiword is needed to advanced import/export features of pads*/
"abiword" : null,
/* This is the path to the Tidy executable. Setting it to null, disables Tidy.
Tidy is used to improve the quality of exported pads*/
"tidyHtml" : null,
/* Allow import of file types other than the supported types: txt, doc, docx, rtf, odt, html & htm */
"allowUnknownFileEnds" : true,

View file

@ -28,6 +28,7 @@ var fs = require("fs");
var settings = require('../utils/Settings');
var os = require('os');
var hooks = require("ep_etherpad-lite/static/js/pluginfw/hooks");
var TidyHtml = require('../utils/TidyHtml');
//load abiword only if its enabled
if(settings.abiword != null)
@ -172,12 +173,19 @@ exports.doExport = function(req, res, padId, type)
fs.writeFile(srcFile, html, callback);
}
},
//send the convert job to abiword
// Tidy up the exported HTML
function(callback)
{
//ensure html can be collected by the garbage collector
html = null;
TidyHtml.tidy(srcFile, callback);
},
//send the convert job to abiword
function(callback)
{
destFile = tempDirectory + "/etherpad_export_" + randNum + "." + type;
abiword.convertFile(srcFile, destFile, type, callback);
},

View file

@ -152,6 +152,11 @@ exports.minify = true;
*/
exports.abiword = null;
/**
* The path of the tidy executable
*/
exports.tidyHtml = null;
/**
* Should we support none natively supported file types on import?
*/

View file

@ -0,0 +1,35 @@
/**
* Tidy up the HTML in a given file
*/
var settings = require("./Settings");
var spawn = require('child_process').spawn;
exports.tidy = function(srcFile, callback) {
// Don't do anything if Tidy hasn't been enabled
if (!settings.tidyHtml) {
return callback(null);
}
var errMessage = '';
// Spawn a new tidy instance that cleans up the file inline
var tidy = spawn(settings.tidyHtml, ['-modify', srcFile]);
// Keep track of any error messages
tidy.stderr.on('data', function (data) {
errMessage += data.toString();
});
// Wait until Tidy is done
tidy.on('close', function(code) {
// Tidy returns a 0 when no errors occur and a 1 exit code when
// the file could be tidied but a few warnings were generated
if (code === 0 || code === 1) {
return callback(null);
} else {
console.error(errMessage);
return callback('Tidy died with exit code ' + code);
}
});
};