format.js #1

/*
	Copyright (c) 2004-2011, The Dojo Foundation All Rights Reserved.
	Available via Academic Free License >= 2.1 OR the modified BSD license.
	see: http://dojotoolkit.org/license for details
*/


if(!dojo._hasResource["dojox.html.format"]){ //_hasResource checks added by build. Do not use _hasResource directly in your code.
dojo._hasResource["dojox.html.format"] = true;
dojo.provide("dojox.html.format");
dojo.require("dojox.html.entities");


dojox.html.format.prettyPrint = function(html/*String*/, indentBy /*Integer?*/, maxLineLength /*Integer?*/, map/*Array?*/, /*boolean*/ xhtml){
	// summary:
	//		Function for providing a 'pretty print' version of HTML content from
	//		the provided string.  It's nor perfect by any means, but it does
	//		a 'reasonable job'.
	// html: String
	//		The string of HTML to try and generate a 'pretty' formatting.
	// indentBy:  Integer
	//		Optional input for the number of spaces to use when indenting.
	//		If not defined, zero, negative, or greater than 10, will just use tab
	//		as the indent.
	// maxLineLength: Integer
	//		Optional input for the number of characters a text line should use in
	//		the document, including the indent if possible.
	// map:	Array
	//		Optional array of entity mapping characters to use when processing the
	//		HTML Text content.  By default it uses the default set used by the
	//		dojox.html.entities.encode function.
	// xhtml: boolean
	//		Optional parameter that declares that the returned HTML should try to be 'xhtml' compatible.
	//		This means normally unclosed tags are terminated with /> instead of >.  Example: <hr> -> <hr />
	var content = [];
	var indentDepth = 0;
	var closeTags = [];
	var iTxt = "\t";
	var textContent = "";
	var inlineStyle = [];
	var i;

	// Compile regexps once for this call.
	var rgxp_fixIEAttrs = /[=]([^"']+?)(\s|>)/g;
	var rgxp_styleMatch = /style=("[^"]*"|'[^']*'|\S*)/gi;
	var rgxp_attrsMatch = /[\w-]+=("[^"]*"|'[^']*'|\S*)/gi;

	// Check to see if we want to use spaces for indent instead
	// of tab.
	if(indentBy && indentBy > 0 && indentBy < 10){
		iTxt = "";
		for(i = 0; i < indentBy; i++){
			iTxt += " ";
		}
	}

	//Build the content outside of the editor so we can walk
	//via DOM and build a 'pretty' output.
	var contentDiv = dojo.doc.createElement("div");
	contentDiv.innerHTML = html;

	// Use the entity encode/decode functions, they cache on the map,
	// so it won't multiprocess a map.
	var encode = dojox.html.entities.encode;
	var decode = dojox.html.entities.decode;

	/** Define a bunch of formatters to format the output. **/
	var isInlineFormat = function(tag){
		// summary:
		//		Function to determine if the current tag is an inline
		//		element that does formatting, as we don't want to
		//		break/indent around it, as it can screw up text.
		// tag:
		//		The tag to examine
		switch(tag){
			case "a":
			case "b":
			case "strong":
			case "s":
			case "strike":
			case "i":
			case "u":
			case "em":
			case "sup":
			case "sub":
			case "span":
			case "font":
			case "big":
			case "cite":
			case "q":
			case "small":
				return true;
			default:
				return false;
		}
	};

	//Create less divs.
	var div = contentDiv.ownerDocument.createElement("div");
	var outerHTML =  function(node){
		// summary:
		//		Function to return the outer HTML of a node.
		//		Yes, IE has a function like this, but using cloneNode
		//		allows avoiding looking at any child nodes, because in this
		//		case, we don't want them.
		var clone = node.cloneNode(false);
		div.appendChild(clone);
		var html = div.innerHTML;
		div.innerHTML = "";
		return html;
	};

	var sizeIndent = function(){
		var i, txt = "";
		for(i = 0; i < indentDepth; i++){
			txt += iTxt;
		}
		return txt.length;
	}

	var indent = function(){
		// summary:
		//		Function to handle indent depth.
		var i;
		for(i = 0; i < indentDepth; i++){
			content.push(iTxt);
		}
	};
	var newline = function(){
		// summary:
		//		Function to handle newlining.
		content.push("\n");
	};

	var processTextNode = function(n){
		// summary:
		//		Function to process the text content for doc
		//		insertion
		// n:
		//		The text node to process.
		textContent += encode(n.nodeValue, map);
	};

	var formatText = function(txt){
		// summary:
		//		Function for processing the text content encountered up to a
		//		point and inserting it into the formatted document output.
		// txt:
		//		The text to format.
		var i;
		var _iTxt;

		// Clean up any indention organization since we're going to rework it
		// anyway.
		var _lines = txt.split("\n");
		for(i = 0; i < _lines.length; i++){
			_lines[i] = dojo.trim(_lines[i]);
		}
		txt = _lines.join(" ");
		txt = dojo.trim(txt);
		if(txt !== ""){
			var lines = [];
			if(maxLineLength && maxLineLength > 0){
				var indentSize = sizeIndent();
				var maxLine = maxLineLength;
				if(maxLineLength > indentSize){
					maxLine -= indentSize;
				}
				while(txt){
					if(txt.length > maxLineLength){
						for(i = maxLine; (i > 0 && txt.charAt(i) !== " "); i--){
							// Do nothing, we're just looking for a space to split at.
						}
						if(!i){
							// Couldn't find a split going back, so go forward.
							for(i = maxLine; (i < txt.length && txt.charAt(i) !== " "); i++){
								// Do nothing, we're just looking for a space to split at.
							}
						}
						var line = txt.substring(0, i);
						line = dojo.trim(line);
						// Shift up the text string to the next chunk.
						txt = dojo.trim(txt.substring((i == txt.length)?txt.length:i + 1, txt.length));
						if(line){
							_iTxt = "";
							for(i = 0; i < indentDepth; i++){
								_iTxt += iTxt;
							}
							line = _iTxt + line + "\n";
						}
						lines.push(line);
					}else{
						// Line is shorter than out desired length, so use it.
						// as/is
						_iTxt = "";
						for(i = 0; i < indentDepth; i++){
							_iTxt += iTxt;
						}
						txt = _iTxt + txt + "\n";
						lines.push(txt);
						txt = null;
					}
				}
				return lines.join("");
			}else{
				_iTxt = "";
				for(i = 0; i < indentDepth; i++){
					_iTxt += iTxt;
				}
				txt = _iTxt + txt + "\n";
				return txt;
			}
		}else{
			return "";
		}
	};

	var processScriptText = function(txt){
		// summary:
		//		Function to clean up potential escapes in the script code.
		if(txt){
			txt = txt.replace(/&quot;/gi, "\"");
			txt = txt.replace(/&gt;/gi, ">");
			txt = txt.replace(/&lt;/gi, "<");
			txt = txt.replace(/&amp;/gi, "&");
		}
		return txt;
	};

	var formatScript = function(txt){
		// summary:
		//		Function to rudimentary formatting of script text.
		//		Not perfect, but it helps get some level of organization
		//		in there.
		// txt:
		//		The script text to try to format a bit.
		if(txt){
			txt = processScriptText(txt);
			var i, t, c, _iTxt;
			var indent = 0;
			var scriptLines = txt.split("\n");
			var newLines = [];
			for (i = 0; i < scriptLines.length; i++){
				var line = scriptLines[i];
				var hasNewlines = (line.indexOf("\n") > -1);
				line = dojo.trim(line);
				if(line){
					var iLevel = indent;
					// Not all blank, so we need to process.
					for(c = 0; c < line.length; c++){
						var ch = line.charAt(c);
						if(ch === "{"){
							indent++;
						}else if(ch === "}"){
							indent--;
							// We want to back up a bit before the
							// line is written.
							iLevel = indent;
						}
					}
					_iTxt = "";
					for(t = 0; t < indentDepth + iLevel; t++){
						_iTxt += iTxt;
					}
					newLines.push(_iTxt + line + "\n");
				}else if(hasNewlines && i === 0){
					// Just insert a newline for blank lines as
					// long as it's not the first newline (we
					// already inserted that in the openTag handler)
					newLines.push("\n");
				}

			}
			// Okay, create the script text, hopefully reasonably
			// formatted.
			txt = newLines.join("");
		}
		return txt;
	};

	var openTag = function(node){
		// summary:
		//		Function to open a new tag for writing content.
		var name = node.nodeName.toLowerCase();
		// Generate the outer node content (tag with attrs)
		var nText = dojo.trim(outerHTML(node));
		var tag = nText.substring(0, nText.indexOf(">") + 1);

		// Also thanks to IE, we need to check for quotes around
		// attributes and insert if missing.
		tag = tag.replace(rgxp_fixIEAttrs,'="$1"$2');

		// And lastly, thanks IE for changing style casing and end
		// semi-colon and webkit adds spaces, so lets clean it up by
		// sorting, etc, while we're at it.
		tag = tag.replace(rgxp_styleMatch, function(match){
			var sL = match.substring(0,6);
			var style = match.substring(6, match.length);
			var closure = style.charAt(0);
			style = dojo.trim(style.substring(1,style.length -1));
			style = style.split(";");
			var trimmedStyles = [];
			dojo.forEach(style, function(s){
				s = dojo.trim(s);
				if(s){
					// Lower case the style name, leave the value alone.  Mainly a fixup for IE.
					s = s.substring(0, s.indexOf(":")).toLowerCase() + s.substring(s.indexOf(":"), s.length);
					trimmedStyles.push(s);
				}
			});
			trimmedStyles = trimmedStyles.sort();
			
			// Reassemble and return the styles in sorted order.
			style = trimmedStyles.join("; ");
			var ts = dojo.trim(style);
			if(!ts || ts === ";"){
				// Just remove any style attrs that are empty.
				return "";
			}else{
				style += ";";
				return sL + closure + style + closure;
			}
		});

		// Try and sort the attributes while we're at it.
		var attrs = [];
		tag = tag.replace(rgxp_attrsMatch, function(attr){
			attrs.push(dojo.trim(attr));
			return "";
		});
		attrs = attrs.sort();

		// Reassemble the tag with sorted attributes!
		tag = "<" + name;
		if(attrs.length){
			 tag += " " + attrs.join(" ");
		}

		// Determine closure status.  If xhtml,
		// then close the tag properly as needed.
		if(nText.indexOf("</") != -1){
			closeTags.push(name);
			tag += ">";
		}else{
			if(xhtml){
				tag += " />";
			}else{
				tag += ">";
			}
			closeTags.push(false);
		}

		var inline = isInlineFormat(name);
		inlineStyle.push(inline);
		if(textContent && !inline){
			// Process any text content we have that occurred
			// before the open tag of a non-inline.
			content.push(formatText(textContent));
			textContent = "";
		}

		// Determine if this has a closing tag or not!
		if(!inline){
			indent();
			content.push(tag);
			newline();
			indentDepth++;
		}else{
			textContent += tag;
		}
		
	};
	
	var closeTag = function(){
		// summary:
		//		Function to close out a tag if necessary.
		var inline = inlineStyle.pop();
		if(textContent && !inline){
			// Process any text content we have that occurred
			// before the close tag.
			content.push(formatText(textContent));
			textContent = "";
		}
		var ct = closeTags.pop();
		if(ct){
			ct = "</" + ct + ">";
			if(!inline){
				indentDepth--;
				indent();
				content.push(ct);
				newline();
			}else{
				textContent += ct;
			}
		}else{
			indentDepth--;
		}
	};

	var processCommentNode = function(n){
		// summary:
		//		Function to handle processing a comment node.
		// n:
		//		The comment node to process.

		//Make sure contents aren't double-encoded.
		var commentText = decode(n.nodeValue, map);
		indent();
		content.push("<!--");
		newline();
		indentDepth++;
		content.push(formatText(commentText));
		indentDepth--;
		indent();
		content.push("-->");
		newline();
	};

	var processNode = function(node) {
		// summary:
		//		Entrypoint for processing all the text!
		var children = node.childNodes;
		if(children){
			var i;
			for(i = 0; i < children.length; i++){
				var n = children[i];
				if(n.nodeType === 1){
					var tg = dojo.trim(n.tagName.toLowerCase());
					if(dojo.isIE && n.parentNode != node){
						// IE is broken.  DOMs are supposed to be a tree.
						// But in the case of malformed HTML, IE generates a graph
						// meaning one node ends up with multiple references
						// (multiple parents).  This is totally wrong and invalid, but
						// such is what it is.  We have to keep track and check for
						// this because otherwise the source output HTML will have dups.
						continue;
					}
					if(tg && tg.charAt(0) === "/"){
						// IE oddity.  Malformed HTML can put in odd tags like:
						// </ >, </span>.  It treats a mismatched closure as a new
						// start tag.  So, remove them.
						continue;
					}else{
						//Process non-dup, seemingly wellformed elements!
						openTag(n);
						if(tg === "script"){
							content.push(formatScript(n.innerHTML));
						}else if(tg === "pre"){
							var preTxt = n.innerHTML;
							if(dojo.isMoz){
								//Mozilla screws this up, so fix it up.
								preTxt = preTxt.replace("<br>", "\n");
								preTxt = preTxt.replace("<pre>", "");
								preTxt = preTxt.replace("</pre>", "");
							}
							// Add ending newline, if needed.
							if(preTxt.charAt(preTxt.length - 1) !== "\n"){
								preTxt += "\n";
							}
							content.push(preTxt);
						}else{
							processNode(n);
						}
						closeTag();
					}
				}else if(n.nodeType === 3 || n.nodeType === 4){
					processTextNode(n);
				}else if(n.nodeType === 8){
					processCommentNode(n);
				}
			}
		}
	};

	//Okay, finally process the input string.
	processNode(contentDiv);
	if(textContent){
		// Insert any trailing text.  See: #10854
		content.push(formatText(textContent));
		textContent = "";
	}
	return content.join(""); //String
};

}
#	Change	User	Description
#1	16170	perforce_software	Move Chronicle files to follow new path scheme for branching.
//guest/perforce_software/chronicle/collateral/dojo-debug/dojox/html/format.js
#1	8972	Matt Attaway	Initial add of the Chronicle source code