<?php
/**
 * Zend Framework
 *
 * LICENSE
 *
 * This source file is subject to the new BSD license that is bundled
 * with this package in the file LICENSE.txt.
 * It is also available through the world-wide-web at this URL:
 * http://framework.zend.com/license/new-bsd
 * If you did not receive a copy of the license and are unable to
 * obtain it through the world-wide-web, please send an email
 * to [email protected] so we can send you a copy immediately.
 *
 * @category   Zend
 * @package    Zend_Markup
 * @subpackage Parser
 * @copyright  Copyright (c) 2005-2012 Zend Technologies USA Inc. (http://www.zend.com)
 * @license    http://framework.zend.com/license/new-bsd     New BSD License
 * @version    $Id: Textile.php 24594 2012-01-05 21:27:01Z matthew $
 */
/**
 * @see Zend_Markup_TokenList
 */
require_once 'Zend/Markup/TokenList.php';
/**
 * @see Zend_Markup_Parser_ParserInterface
 */
require_once 'Zend/Markup/Parser/ParserInterface.php';
/**
 * @category   Zend
 * @package    Zend_Markup
 * @subpackage Parser
 * @copyright  Copyright (c) 2005-2012 Zend Technologies USA Inc. (http://www.zend.com)
 * @license    http://framework.zend.com/license/new-bsd     New BSD License
 */
class Zend_Markup_Parser_Textile implements Zend_Markup_Parser_ParserInterface
{
    const STATE_SCAN          = 0;
    const STATE_NEW_PARAGRAPH = 1;
    const STATE_NEWLINE       = 2;
    const MATCH_ATTR_CLASSID = '\((?<attr_class>[a-zA-Z0-9_]+)?(?:\#(?<attr_id>[a-zA-Z0-9_]+))?\)';
    const MATCH_ATTR_STYLE   = "\{(?<attr_style>[^\}\n]+)\}";
    const MATCH_ATTR_LANG    = '\[(?<attr_lang>[a-zA-Z_]+)\]';
    const MATCH_ATTR_ALIGN   = '(?<attr_align>\<\>?|\>|=)';
    /**
     * Token tree
     *
     * @var Zend_Markup_TokenList
     */
    protected $_tree;
    /**
     * Current token
     *
     * @var Zend_Markup_Token
     */
    protected $_current;
    /**
     * Source to tokenize
     *
     * @var string
     */
    protected $_value = '';
    /**
     * Length of the value
     *
     * @var int
     */
    protected $_valueLen = 0;
    /**
     * Current pointer
     *
     * @var int
     */
    protected $_pointer = 0;
    /**
     * The buffer
     *
     * @var string
     */
    protected $_buffer = '';
    /**
     * Simple tag translation
     *
     * @var array
     */
    protected $_simpleTags = array(
        '*'  => 'strong',
        '**' => 'bold',
        '_'  => 'emphasized',
        '__' => 'italic',
        '??' => 'citation',
        '-'  => 'deleted',
        '+'  => 'insert',
        '^'  => 'superscript',
        '~'  => 'subscript',
        '%'  => 'span',
        // these are a little more complicated
        '@'  => 'code',
        '!'  => 'img',
    );
    /**
     * Token array
     *
     * @var array
     */
    protected $_tokens = array();
    /**
     * Prepare the parsing of a Textile string, the real parsing is done in {@link _parse()}
     *
     * @param string $value
     *
     * @return array
     */
    public function parse($value)
    {
        if (!is_string($value)) {
            /**
             * @see Zend_Markup_Parser_Exception
             */
            require_once 'Zend/Markup/Parser/Exception.php';
            throw new Zend_Markup_Parser_Exception('Value to parse should be a string.');
        }
        if (empty($value)) {
            /**
             * @see Zend_Markup_Parser_Exception
             */
            require_once 'Zend/Markup/Parser/Exception.php';
            throw new Zend_Markup_Parser_Exception('Value to parse cannot be left empty.');
        }
        // first make we only have LF newlines, also trim the value
        $this->_value = str_replace(array("\r\n", "\r"), "\n", $value);
        $this->_value = trim($this->_value);
        // initialize variables and tokenize
        $this->_valueLen = iconv_strlen($this->_value, 'UTF-8');
        $this->_pointer  = 0;
        $this->_buffer   = '';
        $this->_temp     = array();
        $this->_tokens   = array();
        $this->_tokenize();
        // create the tree
        $this->_tree     = new Zend_Markup_TokenList();
        $this->_current  = new Zend_Markup_Token('', Zend_Markup_Token::TYPE_NONE, 'Zend_Markup_Root');
        $this->_tree->addChild($this->_current);
        $this->_createTree();
        return $this->_tree;
    }
    /**
     * Tokenize a textile string
     *
     * @return array
     */
    protected function _tokenize()
    {
        $state    = self::STATE_NEW_PARAGRAPH;
        $attrsMatch = implode('|', array(
            self::MATCH_ATTR_CLASSID,
            self::MATCH_ATTR_STYLE,
            self::MATCH_ATTR_LANG,
            self::MATCH_ATTR_ALIGN
        ));
        $paragraph = '';
        while ($this->_pointer < $this->_valueLen) {
            switch ($state) {
                case self::STATE_SCAN:
                    $matches = array(); //[^\n*_?+~%@!-]
                    $acronym = '(?<acronym>[A-Z]{2,})\((?<title>[^\)]+)\)';
                    $regex   = '#\G(?<text>.*?)(?:'
                             . "(?:(?<nl_paragraph>\n{2,})|(?<nl_break>\n))|"
                             . '(?<tag>'
                             . "(?<name>\*{1,2}|_{1,2}|\?{2}|\-|\+|\~|\^|%|@|!|$|{$acronym}"
                             . '|":(?<url>[^\s]+)|")'
                             . "(?:{$attrsMatch})*)"
                             . ')#si';
                    preg_match($regex, $this->_value, $matches, null, $this->_pointer);
                    $this->_pointer += strlen($matches[0]);
                    if (!empty($matches['text'])) {
                        $this->_buffer .= $matches['text'];
                    }
                    // first add the buffer
                    if (!empty($this->_buffer)) {
                        $this->_tokens[] = array(
                            'tag'  => $this->_buffer,
                            'type' => Zend_Markup_Token::TYPE_NONE
                        );
                        $this->_buffer = '';
                    }
                    if (!empty($matches['nl_paragraph'])) {
                        $this->_temp = array(
                            'tag'        => $matches['nl_paragraph'],
                            'name'       => 'p',
                            'type'       => Zend_Markup_Token::TYPE_TAG,
                            'attributes' => array()
                        );
                        $state = self::STATE_NEW_PARAGRAPH;
                    } elseif (!empty($matches['nl_break'])) {
                        $this->_tokens[] = array(
                            'tag'        => $matches['nl_break'],
                            'name'       => 'break',
                            'type'       => Zend_Markup_Token::TYPE_TAG,
                            'attributes' => array()
                        );
                        $state   = self::STATE_NEWLINE;
                    } elseif (!empty($matches['tag'])) {
                        if (isset($this->_simpleTags[$matches['name']])) {
                            // now add the new token
                            $this->_tokens[] = array(
                                'tag'        => $matches['tag'],
                                'type'       => Zend_Markup_Token::TYPE_TAG,
                                'name'       => $this->_simpleTags[$matches['name']],
                                'attributes' => $this->_extractAttributes($matches)
                            );
                        } else {
                            $attributes = $this->_extractAttributes($matches);
                            if ($matches['tag'][0] == '"') {
                                $name = 'url';
                                if (isset($matches['url'])) {
                                    $attributes['url'] = $matches['url'];
                                }
                                $this->_tokens[] = array(
                                    'tag'        => $matches['tag'],
                                    'type'       => Zend_Markup_Token::TYPE_TAG,
                                    'name'       => $name,
                                    'attributes' => $attributes
                                );
                            } else {
                                $name = 'acronym';
                                $this->_tokens[] = array(
                                    'tag'        => '',
                                    'type'       => Zend_Markup_Token::TYPE_TAG,
                                    'name'       => 'acronym',
                                    'attributes' => array(
                                        'title' => $matches['title']
                                    )
                                );
                                $this->_tokens[] = array(
                                    'tag'  => $matches['acronym'],
                                    'type' => Zend_Markup_Token::TYPE_NONE
                                );
                                $this->_tokens[] = array(
                                    'tag'        => '(' . $matches['title'] . ')',
                                    'type'       => Zend_Markup_Token::TYPE_TAG,
                                    'name'       => 'acronym',
                                    'attributes' => array()
                                );
                            }
                        }
                        $state = self::STATE_SCAN;
                    }
                    break;
                case self::STATE_NEW_PARAGRAPH:
                    if (empty($this->_temp)) {
                        $this->_temp = array(
                            'tag'        => '',
                            'name'       => 'p',
                            'type'       => Zend_Markup_Token::TYPE_TAG,
                            'attributes' => array()
                        );
                    } else {
                        $this->_tokens[] = array(
                            'tag'        => "\n",
                            'name'       => 'p',
                            'type'       => Zend_Markup_Token::TYPE_TAG,
                            'attributes' => array()
                        );
                        $this->_temp['tag'] = substr($this->_temp['tag'], 1);
                    }
                    $matches = array(); //[^\n*_?+~%@!-] (\()? [^()]+ (?(1)\))
                    $regex   = "#\G(?<name>(h[1-6]|p)|(?:\#|\*))(?:{$attrsMatch})*(?(2)\.\s|\s)#i";
                    if (!preg_match($regex, $this->_value, $matches, null, $this->_pointer)) {
                        $this->_tokens[] = $this->_temp;
                        $state    = self::STATE_SCAN;
                        break;
                    }
                    $this->_pointer += strlen($matches[0]);
                    if ($matches['name'] == 'p') {
                        $this->_temp['tag']       .= $matches[0];
                        $this->_temp['attributes'] = $this->_extractAttributes($matches);
                        $this->_tokens[]    = $this->_temp;
                        $this->_temp = array();
                    } else {
                        $this->_tokens[] = $this->_temp;
                        $this->_temp = array();
                        $name       = $matches['name'];
                        $attributes = $this->_extractAttributes($matches);
                        if ($name == '#') {
                            $name               = 'list';
                            $attributes['list'] = 'decimal';
                        } elseif ($name == '*') {
                            $name = 'list';
                        }
                        $this->_tokens[] = array(
                            'tag'        => $matches[0],
                            'name'       => $name,
                            'type'       => Zend_Markup_Token::TYPE_TAG,
                            'attributes' => $attributes
                        );
                    }
                    $state = self::STATE_SCAN;
                    break;
                case self::STATE_NEWLINE:
                    $matches = array(); //[^\n*_?+~%@!-]
                    $regex   = "#\G(?<name>(h[1-6])|(?:\#|\*))(?:{$attrsMatch})*(?(2)\.\s|\s)#si";
                    if (!preg_match($regex, $this->_value, $matches, null, $this->_pointer)) {
                        $state = self::STATE_SCAN;
                        break;
                    }
                    $this->_pointer += strlen($matches[0]);
                    $name       = $matches['name'];
                    $attributes = $this->_extractAttributes($matches);
                    if ($name == '#') {
                        $name               = 'list';
                        $attributes['list'] = 'decimal';
                    } elseif ($name == '*') {
                        $name = 'list';
                    }
                    $this->_tokens[] = array(
                        'tag'        => $matches[0],
                        'name'       => $name,
                        'type'       => Zend_Markup_Token::TYPE_TAG,
                        'attributes' => $attributes
                    );
                    break;
            }
        }
    }
    /**
     * Create a tree from the tokenized text
     *
     * @return void
     */
    protected function _createTree()
    {
        $inside = true;
        foreach ($this->_tokens as $key => $token) {
            // first check if the token is a stopper
            if ($this->_isStopper($token, $this->_current)) {
                if ($this->_current->getName() == 'li') {
                    // list items are handled differently
                    if (isset($this->_tokens[$key + 1])
                        && ($this->_tokens[$key + 1]['type'] == Zend_Markup_Token::TYPE_TAG)
                        && ($this->_tokens[$key + 1]['name'] == 'list')
                    ) {
                        // the next item is a correct tag
                        $this->_current->setStopper($token['tag']);
                        $this->_current = $this->_current->getParent();
                    } else {
                        // close the list
                        $this->_current->setStopper($token['tag']);
                        $this->_current = $this->_current->getParent()->getParent();
                        // go up in the tree until we found the end
                        while ($this->_isStopper($token, $this->_current)) {
                            $this->_current->setStopper($token['tag']);
                            $this->_current = $this->_current->getParent();
                        }
                    }
                } else {
                    // go up in the tree until we found the end of stoppers
                    while ($this->_isStopper($token, $this->_current)) {
                        $this->_current->setStopper($token['tag']);
                        if (!empty($token['attributes'])) {
                            foreach ($token['attributes'] as $name => $value) {
                                $this->_current->addAttribute($name, $value);
                            }
                        }
                        $this->_current = $this->_current->getParent();
                    }
                }
                $inside = true;
            } elseif (($token['type'] == Zend_Markup_Token::TYPE_TAG) && $inside) {
                if ($token['name'] == 'break') {
                    // add the newline and continue parsing
                    $this->_current->addChild(new Zend_Markup_Token(
                        $token['tag'],
                        Zend_Markup_Token::TYPE_NONE,
                        '',
                        array(),
                        $this->_current
                    ));
                } else {
                    // handle a list item
                    if ($token['name'] == 'list') {
                        $attributes = array();
                        if (isset($token['attributes']['list'])) {
                            $attributes['list'] = $token['attributes']['list'];
                            unset($token['attributes']['list']);
                        }
                        if ($this->_current->getName() != 'list') {
                            // the list isn't started yet, create it
                            $child = new Zend_Markup_Token(
                                '',
                                Zend_Markup_Token::TYPE_TAG,
                                'list',
                                $attributes,
                                $this->_current
                            );
                            $this->_current->addChild($child);
                            $this->_current = $child;
                        }
                        $token['name'] = 'li';
                    } elseif (($token['name'] == 'img') || ($token['name'] == 'url')) {
                        $inside = false;
                    }
                    // add the token
                    $child = new Zend_Markup_Token(
                        $token['tag'],
                        Zend_Markup_Token::TYPE_TAG,
                        $token['name'],
                        $token['attributes'],
                        $this->_current
                    );
                    $this->_current->addChild($child);
                    $this->_current = $child;
                }
            } else {
                // simply add the token as text
                $this->_current->addChild(new Zend_Markup_Token(
                    $token['tag'],
                    Zend_Markup_Token::TYPE_NONE,
                    '',
                    array(),
                    $this->_current
                ));
            }
        }
    }
    /**
     * Check if a tag is a stopper
     *
     * @param array $token
     * @param Zend_Markup_Token $current
     *
     * @return bool
     */
    protected function _isStopper(array $token, Zend_Markup_Token $current)
    {
        switch ($current->getName()) {
            case 'h1':
            case 'h2':
            case 'h3':
            case 'h4':
            case 'h5':
            case 'h6':
            case 'list':
            case 'li':
                if (($token['type'] == Zend_Markup_Token::TYPE_TAG)
                    && (($token['name'] == 'break') || ($token['name'] == 'p'))
                ) {
                    return true;
                }
                break;
            case 'break':
                return false;
                break;
            default:
                if (($token['type'] == Zend_Markup_Token::TYPE_TAG) && ($token['name'] == $current->getName())) {
                    return true;
                }
                break;
        }
        return false;
    }
    /**
     * Extract the attributes
     *
     * @param array $matches
     *
     * @return array
     */
    protected function _extractAttributes(array $matches)
    {
        $attributes = array();
        if (!empty($matches['attr_class'])) {
            $attributes['class'] = $matches['attr_class'];
        }
        if (!empty($matches['attr_id'])) {
            $attributes['id'] = $matches['attr_id'];
        }
        if (!empty($matches['attr_style'])) {
            $attributes['style'] = $matches['attr_style'];
        }
        if (!empty($matches['attr_lang'])) {
            $attributes['lang'] = $matches['attr_lang'];
        }
        if (!empty($matches['attr_align'])) {
            switch ($matches['attr_align']) {
                case '=':
                    $attributes['align'] = 'center';
                    break;
                case '>':
                    $attributes['align'] = 'right';
                    break;
                case '<>':
                    $attributes['align'] = 'justify';
                    break;
                default:
                case '<':
                    $attributes['align'] = 'left';
                    break;
            }
        }
        return $attributes;
    }
}