<?php
/*
Copyright (C) 2006-2007 Mihai Şucan
http://www.robodesign.ro/

This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.

This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.

You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
*/

// Last update: 2007-06-19

return array(
	// add the retidy mark to <head>
	'retidy_mark' => true,

	'encoding' => 'utf-8',

	// execution macro: this defines the methods you want to call, each and every one of these "cleans" something
	// call the methods how many times you want, change the order as needed
	'macro' => array(/*'pre_tidy_regex',*/ 'tidy_code', 'remove_nodes', 'my_strip_tags', 'strip_lang', 'strip_br_dupes', 'trim_br_tags', 'replace_tags', 'tidy_code', 'dom_init', /*'dom_regenerate_tables',*/ 'dom_fix_text_tags', 'dom_fix_headings', 'dom_strip_child_tags', 'dom_strip_attrs', 'dom_strip_only_child', 'dom_strip_parent_only_child', /*'dom_merge_parent_attr', 'dom_strip_no_attr',*/ 'dom_save', 'post_dom_stripme', 'post_dom_renametag', 'strip_empty_tags', 'combine_inline', 'reorder_tags', 'combine_inline', 'combine_br_tags', 'trim_br_tags', 'fix_img_pos', 'extend_quotes', 'combine_broken_tags', 'hruler', 'tidy_code', 'trim_br_tags', 'dom_parse_lists', /*'final_regex', 'dom_toc_add' */),

	// important methods that, during execution, must not fail
	// if they fail (return false), the entire execution is halted
	'important_methods' => array('tidy_code', 'dom_init', 'dom_save'),

	// the methods listed here will be ran exhaustively, until they return -1
	'loop_methods' => array(),

	'loop_max' => 10,

	// which HTML Tidy to use? Possible values:
	// 'php' - the htmltidy module from PHP
	// 'anything else' - you can provide a command to execute (the document will be sent via STDIN and the result is expected via STDOUT)
	'htmltidy_app' => 'php',

	// execute the following search and replace prior to the first execution of tidy
	'pre_tidy_regex' => array(
		'name' => array ('search', 'replace'),
		// the name is used just for verbose output
	),

	'pre_tidy_regex' => false,

	// execute the following search and replace after all cleanup
	'final_regex' => array(
		'name' => array ('search', 'replace'),
	),

	'final_regex' => false,

	// for unknown reason PHP DOM won't removeAttribute('lang'), so we have to use regex instead
	'strip_lang' => true,
	'strip_xmllang' => true,
	'show_tidy_errors' => false,

	/*
	This regenerates stupid tables.

	From:

	<table>
	<tr>
	<td>
	<any>blah 1</any>
	blah 2<br />
	blah 3</td>
	<td>
	<any>boom 1</any>
	boom 2<br />
	boom 3</td>
	</tr>
	</table>

	To:

	<table>
	<tr>
	<td>blah 1</td>
	<td>boom 1</td>
	</tr>
	<tr>
	<td>blah 2</td>
	<td>boom 2</td>
	</tr>
	<tr>
	<td>blah 3</td>
	<td>boom 3</td>
	</tr>
	</table>
	*/
	'regenerate_tables' => false,
	'regenerate_tables_tr' => array('br', 'h1', 'h2', 'h3', 'h4', 'h5', 'h6', 'p', ),

	// replace duplicated <br> tags with one <br> 
	'strip_br_dupes' => true,

	// trim any <br> in:
	// </p> <br> blah blah
	// blah blah <br> <p>
	// <p> <br> blah blah
	'trim_br_tags' => array('p', 'div', 'table', 'th', 'td', 'h1', 'h2', 'h3', 'h4', 'h5', 'h6', 'ul', 'ol', 'dl', 'li', 'dt', 'dd', 'strong', 'em', 'i', 'u', 'b', 'tr', 'pre'),

	// completely remove nodes, including the content
	'remove_nodes' => array('style', 'script'),

	// given tags A and B. if B is the only child of A, then B is stripped.
	// from: <h1><em>haha</em></h1>
	// to: <h1>haha</h1>
	'strip_only_child' => array(
		// 'parent' => array('child')
		'h1' => array('em', 'strong'),
		'h2' => array('em', 'strong'),
		'h3' => array('em', 'strong'),
		'h4' => array('em', 'strong'),
		'h5' => array('em', 'strong'),
		'h6' => array('em', 'strong'),
		),

	// given tags A and B. if B is the only child of A, then A is stripped.
	// from: <h1><img src="img.png" /></h1>
	// to: <img src="img.png" />
	'strip_parent_only_child' => array(
		// 'parent' => array('child')
		'h1' => array('img'),
		'h2' => array('img'),
		'h3' => array('img'),
		'h4' => array('img'),
		'h5' => array('img'),
		'h6' => array('img'),
	//	'p' => array('img'),
		),

	'strip_tags' => array(
		// 'parent' => array('child')
		'*' => array('span', /*'dl', 'dt', 'dd', 'div', */ 'font', 'col', 'colgroup', 'strong'),
		'td' => array('p', 'h1', 'h2', 'h3', 'h4', 'h5', 'h6', ),
		'li' => array('p', 'h1', 'h2', 'h3', 'h4', 'h5', 'h6', ),
		'h1' => array('strong', ),
		'h2' => array('strong', ),
		'h3' => array('strong', ),
		'h4' => array('strong', ),
		'h5' => array('strong', ),
		'h6' => array('strong', ),

		// merge tags
		'strong' => array('strong'),
		'em' => array('em'),
		'u' => array('u'),
		'i' => array('i'),
		'b' => array('b'),
		'p' => array('p'),
		),

	'strip_attrs' => array(
		// tag => array('attributes')
		'*' => array(
			'lang', /*'xml:lang',*/ 'color', 'style', 'class', 'width', 'height', 'id', 'name', 'border', 'dir', 'valign', 'clear', 'align', 'frame'),
		'img' => array(
			'hspace',
			'vspace',
			'alt',
			'align',
			),
		'table' => array(
			'cellpadding',
			'cellspacing',
			'rules',
			),
		),

	'strip_empty_tags' => array('p', 'span', 'div', 'title', 'style', 'h1', 'h2', 'h3', 'h4', 'h5', 'h6', 'b', 'i', 'em', 'strong', 'sup', 'sub', 'u', 'td', 'tr', 'table', 'script'),

	// fix inline tags like:
	// from: <em>hahaa</em>, <em>cucu</em>
	// to: <em>hahaa, cucu</em>
	'combine_inline' => array('em', 'strong', 'u', 'i', 'b', 'sup', 'sub'),
	'combine_inline_chars' => '(([\s,.:;\'"\[\]\(\)\+=_\-\`\\\\\/~!@#\$%\^*&\{\}]*|(&#?[a-z0-9]*;)*)*)',

	// fix <ul>...</ul> <br> <ul>...</ul>
	'combine_br_tags' => array('ul', 'ol', 'dl'),

	// fix mangled tags like:
	// from: <strong><em>hahaa</em></strong>, <em>cucu</em>
	// to: <em><strong>hahaa</strong>, cucu</em>
	'reorder_tags' => array('em', 'strong', 'u', 'i', 'b', 'sup', 'sub'),

	'replace_tags' => array('u' => 'strong',
		'b' => 'strong',
		'i' => 'em',
		'dt' => 'p',
		'dd' => 'p',
		'dl' => 'p',
		'div' => 'p',
		),

	// merge parent attributes
	// from: <font face="blah"><font size="7">ugly</font></font>
	// to: <font face="blah" size="7">still too ugly</font>
	'merge_parent_attr' => array('font'),

	// horizontal ruler
	// from <p>***</p> to <hr />
	'hruler' => array('p', 'div'),

	// strip tags with no attribute
	'strip_no_attr' => array('div', 'span'),

	// fix punctuation in text
	// From: I like this ,you know ( what ? !) .
	// To: I like this, you know (what?!).
	'fix_text_tags' => array('p', 'li', 'em', 'strong', 'h1', 'h2', 'h3', 'h4', 'h5', 'h6', 'i', 'u', 'b', 'td', 'th', 'dt', 'dd'),

	// $fix_text_tags = false;

	// fix badly positioned <img>s
	// From: <p>th<img>is really sucks</p>
	// To: <p><img>this doesn't suck</p>
	'fix_img_pos' => true,

	// Extend tags around quotes
	// From: &quot;<em>blah&quot;</em>
	// To: <em>&quot;blah&quot;</em>
	'extend_quotes' => true,

	/*
	Parse (un)ordered lists:
	- haha
	- haha

	1. haha
	2. haha

	"Fuzzy list follows:

	haha;
	cucu;
	super."

	Chars: any non-alphanum char, such as * - . ) and any other unicode char needed

	Numbers: a, b, c, 1, 2, 3

	Contexts:

		<p>- haha</p><p>- haha</p>
	*/
	'parse_lists' => true,

	// this which can be considered as list items
	// the generated list items will not contain the characters (the bullets, nor the numbering chars)
	'parse_lists_tags' => array('p', 'h1', 'h2', 'h3', 'h4', 'h5', 'h6'),

	// tags in which the code searches for lists
	'parse_lists_container_tags' => array('body', 'div'),

	// fuzzy lists are those which have no "bullets" (special chars), and have no numbering
	// fuzzy lists are detected only if there's a node which has a colon at the end of the textContent, nodeName must be p.
	// all the list items in the fuzzy list must end with a semi-colon or a dot.
	// all the list items must have the first alphabetic character with the same case, altering the case can end the list.
	// all fuzzy lists are generated as unordered lists
	// fuzzy lists end when two consecutive nodes ending with a dot are found. the second node is not included in the list.
	'parse_fuzzy_lists' => true,

	// generate unordered lists even when only one list item was found
	'parse_ulist_1li' => false,

	// if this option is active, the list items found as good will also be searched for other sub-items.
	// example: <p>*) elem1 *) elem2</p>
	// results: <li>elem1</li><li>elem2</p>
	// otherwise: <li>elem1 *) elem2</li>
	'parse_ulist_inline' => true,

	// call the combine_br_tags() function for more code cleanup
	'parse_lists_combine_br_tags' => true,

	// this generates tables from: <p>blah\tbloom\tcucu</p> to: <tr><td>blah</td><td>bloom</td><td>cucu</td></tr>
	// !!! not (yet) working due to indentation auto-generated by OpenOffice and due to white-space removal from htmltidy
	// can be made to work with other chars, something like CSV
	// this functionality is included here because the algorithm for parsing is similar
	'parse_table_tabs' => false,

	'tidy_after_parse_lists' => true,

	/* combine broken paragraphs (tags)
	From: <p>I think today is</p> <p>is a very beautiful day.</p>
	To: <p>I think today is a beautiful day.</p>
	*/
	'combine_broken_tags' => array('p', /* 'h1', 'h2', 'h3', 'h4', 'h5', 'h6' */ ),

	// fix heading numbering
	// from: <h1>blah</h1> ... blah ... <h6>blah</h6>
	// to: <h1>blah</h1> ... blah ... <h2>blah</h2>
	'fix_headings' => true,

	// generate Table of Contents for all headers, after running *everything*, including $final_regex
	// this will be an UL added in the body, at the top
	// if this is true, $fix_headings will be also enforced (TOC generator needs fixed headings)
	'add_toc' => false,

	// if false == will generate multiple ULs, for different level of headings
	// if true == one boring UL
	'toc_flat' => false,

	// generate IDs and links
	// '#text' = generate IDs based on textContent
	// 'anything' = generate numeric IDs with 'anything' as prefix
	// false = don't generate any IDs
	'toc_autolink' => '#text',

	'tidy_after_toc' => true,

	// the htmltidy configuration
	// last update: 2007-05-07
	'htmltidy' => array(
		'add-xml-decl' => 1,
		'add-xml-space' => 0,
		'bare' => 1,
		'clean' => 1,
		'doctype' => 'strict',
		'drop-empty-paras' => 1,
		'drop-font-tags' => 1,
		'drop-proprietary-attributes' => 1,
		'enclose-text' => 1,
		'escape-cdata' => 0,
		'fix-backslash' => 1,
		'fix-bad-comments' => 1,
		'fix-uri' => 1,
		'hide-comments' => 1,
		'hide-endtags' => 0,
		'indent-cdata' => 0,
		'input-xml' => 0,
		'join-classes' => 1,
		'join-styles' => 1,
		'literal-attributes' => 0,
		'logical-emphasis' => 1,
		'lower-literals' => 1,
		'merge-divs' => 1,
		'ncr' => 1,
		'numeric-entities' => 1,
		'output-html' => 0,
		'output-xhtml' => 1,
		'output-xml' => 0,
		'quote-ampersand' => 1,
		'quote-marks' => 0,
		'quote-nbsp' => 1,
		'repeated-attributes' => 'keep-last',
		'replace-color' => 0,
		'show-body-only' => 0,
		'uppercase-attributes' => 0,
		'uppercase-tags' => 0,
		'word-2000' => 1,
		'break-before-br' => 1,
		'indent' => 1,
		'indent-attributes' => 0,
		'indent-spaces' => 0,
		'markup' => 1,
		'tab-size' => 2,
		'vertical-space' => 1,
		'wrap-asp' => 0,
		'wrap-attributes' => 0,
		'wrap-php' => 0,
		'wrap-jste' => 0,
		'wrap-script-literals' => 0,
		'wrap-sections' => 0,
		'newline' => 'LF',
		'output-bom' => 0,
		'input-encoding' => 'utf8',
		'output-encoding' => 'utf8',
		'force-output' => 1,
		'tidy-mark' => 0,
	),
);

?>
