web/lib/Zend/Search/Lucene/Document/Html.php
author Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
Tue, 15 Mar 2011 16:32:22 +0100
changeset 68 ecaf28ffe26e
parent 0 web/Zend/Search/Lucene/Document/Html.php@4eba9c11703f
parent 64 web/Zend/Search/Lucene/Document/Html.php@162c1de6545a
child 207 621fa6caec0c
permissions -rw-r--r--
Merge with cc1eea280cdb9d27ecdc9a2898de7a2b9835cde7
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
0
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
     1
<?php
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
     2
/**
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
     3
 * Zend Framework
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
     4
 *
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
     5
 * LICENSE
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
     6
 *
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
     7
 * This source file is subject to the new BSD license that is bundled
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
     8
 * with this package in the file LICENSE.txt.
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
     9
 * It is also available through the world-wide-web at this URL:
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
    10
 * http://framework.zend.com/license/new-bsd
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
    11
 * If you did not receive a copy of the license and are unable to
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
    12
 * obtain it through the world-wide-web, please send an email
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
    13
 * to license@zend.com so we can send you a copy immediately.
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
    14
 *
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
    15
 * @category   Zend
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
    16
 * @package    Zend_Search_Lucene
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
    17
 * @subpackage Document
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
    18
 * @copyright  Copyright (c) 2005-2010 Zend Technologies USA Inc. (http://www.zend.com)
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
    19
 * @license    http://framework.zend.com/license/new-bsd     New BSD License
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
    20
 * @version    $Id: Html.php 23392 2010-11-19 09:53:16Z ramon $
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
    21
 */
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
    22
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
    23
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
    24
/** Zend_Search_Lucene_Document */
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
    25
require_once 'Zend/Search/Lucene/Document.php';
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
    26
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
    27
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
    28
/**
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
    29
 * HTML document.
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
    30
 *
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
    31
 * @category   Zend
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
    32
 * @package    Zend_Search_Lucene
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
    33
 * @subpackage Document
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
    34
 * @copyright  Copyright (c) 2005-2010 Zend Technologies USA Inc. (http://www.zend.com)
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
    35
 * @license    http://framework.zend.com/license/new-bsd     New BSD License
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
    36
 */
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
    37
class Zend_Search_Lucene_Document_Html extends Zend_Search_Lucene_Document
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
    38
{
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
    39
    /**
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
    40
     * List of document links
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
    41
     *
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
    42
     * @var array
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
    43
     */
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
    44
    private $_links = array();
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
    45
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
    46
    /**
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
    47
     * List of document header links
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
    48
     *
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
    49
     * @var array
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
    50
     */
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
    51
    private $_headerLinks = array();
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
    52
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
    53
    /**
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
    54
     * Stored DOM representation
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
    55
     *
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
    56
     * @var DOMDocument
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
    57
     */
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
    58
    private $_doc;
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
    59
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
    60
    /**
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
    61
     * Exclud nofollow links flag
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
    62
     *
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
    63
     * If true then links with rel='nofollow' attribute are not included into
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
    64
     * document links.
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
    65
     *
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
    66
     * @var boolean
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
    67
     */
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
    68
    private static $_excludeNoFollowLinks = false;
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
    69
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
    70
    /**
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
    71
     *
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
    72
     * List of inline tags
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
    73
     *
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
    74
     * @var array
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
    75
     */
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
    76
    private $_inlineTags = array('a', 'abbr', 'acronym', 'dfn', 'em', 'strong', 'code',
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
    77
                                'samp', 'kbd', 'var', 'b', 'i', 'big', 'small', 'strike',
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
    78
                                'tt', 'u', 'font', 'span', 'bdo', 'cite', 'del', 'ins',
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
    79
                                'q', 'sub', 'sup');
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
    80
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
    81
    /**
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
    82
     * Object constructor
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
    83
     *
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
    84
     * @param string  $data         HTML string (may be HTML fragment, )
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
    85
     * @param boolean $isFile
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
    86
     * @param boolean $storeContent
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
    87
     * @param string  $defaultEncoding   HTML encoding, is used if it's not specified using Content-type HTTP-EQUIV meta tag.
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
    88
     */
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
    89
    private function __construct($data, $isFile, $storeContent, $defaultEncoding = '')
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
    90
    {
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
    91
        $this->_doc = new DOMDocument();
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
    92
        $this->_doc->substituteEntities = true;
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
    93
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
    94
        if ($isFile) {
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
    95
            $htmlData = file_get_contents($data);
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
    96
        } else {
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
    97
            $htmlData = $data;
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
    98
        }
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
    99
        @$this->_doc->loadHTML($htmlData);
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   100
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   101
        if ($this->_doc->encoding === null) {
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   102
            // Document encoding is not recognized
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   103
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   104
            /** @todo improve HTML vs HTML fragment recognition */
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   105
            if (preg_match('/<html[^>]*>/i', $htmlData, $matches, PREG_OFFSET_CAPTURE)) {
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   106
                // It's an HTML document
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   107
                // Add additional HEAD section and recognize document
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   108
                $htmlTagOffset = $matches[0][1] + strlen($matches[0][0]);
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   109
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   110
                @$this->_doc->loadHTML(iconv($defaultEncoding, 'UTF-8//IGNORE', substr($htmlData, 0, $htmlTagOffset))
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   111
                                     . '<head><META HTTP-EQUIV="Content-type" CONTENT="text/html; charset=UTF-8"/></head>'
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   112
                                     . iconv($defaultEncoding, 'UTF-8//IGNORE', substr($htmlData, $htmlTagOffset)));
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   113
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   114
                // Remove additional HEAD section
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   115
                $xpath = new DOMXPath($this->_doc);
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   116
                $head  = $xpath->query('/html/head')->item(0);
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   117
                $head->parentNode->removeChild($head);
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   118
            } else {
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   119
                // It's an HTML fragment
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   120
                @$this->_doc->loadHTML('<html><head><META HTTP-EQUIV="Content-type" CONTENT="text/html; charset=UTF-8"/></head><body>'
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   121
                                     . iconv($defaultEncoding, 'UTF-8//IGNORE', $htmlData)
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   122
                                     . '</body></html>');
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   123
            }
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   124
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   125
        }
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   126
        /** @todo Add correction of wrong HTML encoding recognition processing
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   127
         * The case is:
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   128
         * Content-type HTTP-EQUIV meta tag is presented, but ISO-8859-5 encoding is actually used,
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   129
         * even $this->_doc->encoding demonstrates another recognized encoding
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   130
         */
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   131
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   132
        $xpath = new DOMXPath($this->_doc);
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   133
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   134
        $docTitle = '';
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   135
        $titleNodes = $xpath->query('/html/head/title');
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   136
        foreach ($titleNodes as $titleNode) {
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   137
            // title should always have only one entry, but we process all nodeset entries
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   138
            $docTitle .= $titleNode->nodeValue . ' ';
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   139
        }
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   140
        $this->addField(Zend_Search_Lucene_Field::Text('title', $docTitle, 'UTF-8'));
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   141
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   142
        $metaNodes = $xpath->query('/html/head/meta[@name]');
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   143
        foreach ($metaNodes as $metaNode) {
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   144
            $this->addField(Zend_Search_Lucene_Field::Text($metaNode->getAttribute('name'),
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   145
                                                           $metaNode->getAttribute('content'),
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   146
                                                           'UTF-8'));
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   147
        }
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   148
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   149
        $docBody = '';
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   150
        $bodyNodes = $xpath->query('/html/body');
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   151
        foreach ($bodyNodes as $bodyNode) {
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   152
            // body should always have only one entry, but we process all nodeset entries
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   153
            $this->_retrieveNodeText($bodyNode, $docBody);
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   154
        }
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   155
        if ($storeContent) {
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   156
            $this->addField(Zend_Search_Lucene_Field::Text('body', $docBody, 'UTF-8'));
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   157
        } else {
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   158
            $this->addField(Zend_Search_Lucene_Field::UnStored('body', $docBody, 'UTF-8'));
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   159
        }
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   160
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   161
        $linkNodes = $this->_doc->getElementsByTagName('a');
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   162
        foreach ($linkNodes as $linkNode) {
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   163
            if (($href = $linkNode->getAttribute('href')) != '' &&
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   164
                (!self::$_excludeNoFollowLinks  ||  strtolower($linkNode->getAttribute('rel')) != 'nofollow' )
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   165
               ) {
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   166
                $this->_links[] = $href;
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   167
            }
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   168
        }
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   169
        $linkNodes = $this->_doc->getElementsByTagName('area');
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   170
        foreach ($linkNodes as $linkNode) {
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   171
            if (($href = $linkNode->getAttribute('href')) != '' &&
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   172
                (!self::$_excludeNoFollowLinks  ||  strtolower($linkNode->getAttribute('rel')) != 'nofollow' )
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   173
               ) {
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   174
                $this->_links[] = $href;
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   175
            }
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   176
        }
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   177
        $this->_links = array_unique($this->_links);
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   178
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   179
        $linkNodes = $xpath->query('/html/head/link');
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   180
        foreach ($linkNodes as $linkNode) {
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   181
            if (($href = $linkNode->getAttribute('href')) != '') {
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   182
                $this->_headerLinks[] = $href;
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   183
            }
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   184
        }
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   185
        $this->_headerLinks = array_unique($this->_headerLinks);
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   186
    }
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   187
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   188
    /**
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   189
     * Set exclude nofollow links flag
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   190
     *
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   191
     * @param boolean $newValue
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   192
     */
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   193
    public static function setExcludeNoFollowLinks($newValue)
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   194
    {
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   195
        self::$_excludeNoFollowLinks = $newValue;
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   196
    }
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   197
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   198
    /**
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   199
     * Get exclude nofollow links flag
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   200
     *
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   201
     * @return boolean
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   202
     */
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   203
    public static function getExcludeNoFollowLinks()
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   204
    {
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   205
        return self::$_excludeNoFollowLinks;
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   206
    }
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   207
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   208
    /**
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   209
     * Get node text
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   210
     *
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   211
     * We should exclude scripts, which may be not included into comment tags, CDATA sections,
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   212
     *
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   213
     * @param DOMNode $node
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   214
     * @param string &$text
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   215
     */
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   216
    private function _retrieveNodeText(DOMNode $node, &$text)
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   217
    {
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   218
        if ($node->nodeType == XML_TEXT_NODE) {
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   219
            $text .= $node->nodeValue;
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   220
            if(!in_array($node->parentNode->tagName, $this->_inlineTags)) {
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   221
                $text .= ' ';
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   222
            }
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   223
        } else if ($node->nodeType == XML_ELEMENT_NODE  &&  $node->nodeName != 'script') {
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   224
            foreach ($node->childNodes as $childNode) {
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   225
                $this->_retrieveNodeText($childNode, $text);
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   226
            }
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   227
        }
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   228
    }
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   229
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   230
    /**
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   231
     * Get document HREF links
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   232
     *
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   233
     * @return array
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   234
     */
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   235
    public function getLinks()
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   236
    {
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   237
        return $this->_links;
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   238
    }
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   239
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   240
    /**
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   241
     * Get document header links
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   242
     *
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   243
     * @return array
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   244
     */
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   245
    public function getHeaderLinks()
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   246
    {
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   247
        return $this->_headerLinks;
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   248
    }
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   249
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   250
    /**
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   251
     * Load HTML document from a string
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   252
     *
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   253
     * @param string  $data
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   254
     * @param boolean $storeContent
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   255
     * @param string  $defaultEncoding   HTML encoding, is used if it's not specified using Content-type HTTP-EQUIV meta tag.
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   256
     * @return Zend_Search_Lucene_Document_Html
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   257
     */
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   258
    public static function loadHTML($data, $storeContent = false, $defaultEncoding = '')
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   259
    {
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   260
        return new Zend_Search_Lucene_Document_Html($data, false, $storeContent, $defaultEncoding);
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   261
    }
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   262
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   263
    /**
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   264
     * Load HTML document from a file
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   265
     *
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   266
     * @param string  $file
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   267
     * @param boolean $storeContent
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   268
     * @param string  $defaultEncoding   HTML encoding, is used if it's not specified using Content-type HTTP-EQUIV meta tag.
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   269
     * @return Zend_Search_Lucene_Document_Html
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   270
     */
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   271
    public static function loadHTMLFile($file, $storeContent = false, $defaultEncoding = '')
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   272
    {
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   273
        return new Zend_Search_Lucene_Document_Html($file, true, $storeContent, $defaultEncoding);
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   274
    }
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   275
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   276
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   277
    /**
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   278
     * Highlight text in text node
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   279
     *
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   280
     * @param DOMText $node
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   281
     * @param array   $wordsToHighlight
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   282
     * @param callback $callback   Callback method, used to transform (highlighting) text.
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   283
     * @param array    $params     Array of additionall callback parameters (first non-optional parameter is a text to transform)
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   284
     * @throws Zend_Search_Lucene_Exception
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   285
     */
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   286
    protected function _highlightTextNode(DOMText $node, $wordsToHighlight, $callback, $params)
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   287
    {
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   288
        /** Zend_Search_Lucene_Analysis_Analyzer */
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   289
        require_once 'Zend/Search/Lucene/Analysis/Analyzer.php';
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   290
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   291
        $analyzer = Zend_Search_Lucene_Analysis_Analyzer::getDefault();
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   292
        $analyzer->setInput($node->nodeValue, 'UTF-8');
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   293
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   294
        $matchedTokens = array();
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   295
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   296
        while (($token = $analyzer->nextToken()) !== null) {
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   297
            if (isset($wordsToHighlight[$token->getTermText()])) {
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   298
                $matchedTokens[] = $token;
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   299
            }
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   300
        }
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   301
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   302
        if (count($matchedTokens) == 0) {
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   303
            return;
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   304
        }
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   305
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   306
        $matchedTokens = array_reverse($matchedTokens);
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   307
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   308
        foreach ($matchedTokens as $token) {
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   309
            // Cut text after matched token
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   310
            $node->splitText($token->getEndOffset());
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   311
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   312
            // Cut matched node
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   313
            $matchedWordNode = $node->splitText($token->getStartOffset());
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   314
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   315
            // Retrieve HTML string representation for highlihted word
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   316
            $fullCallbackparamsList = $params;
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   317
            array_unshift($fullCallbackparamsList, $matchedWordNode->nodeValue);
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   318
            $highlightedWordNodeSetHtml = call_user_func_array($callback, $fullCallbackparamsList);
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   319
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   320
            // Transform HTML string to a DOM representation and automatically transform retrieved string
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   321
            // into valid XHTML (It's automatically done by loadHTML() method)
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   322
            $highlightedWordNodeSetDomDocument = new DOMDocument('1.0', 'UTF-8');
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   323
            $success = @$highlightedWordNodeSetDomDocument->
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   324
                                loadHTML('<html><head><meta http-equiv="Content-type" content="text/html; charset=UTF-8"/></head><body>'
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   325
                                       . $highlightedWordNodeSetHtml
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   326
                                       . '</body></html>');
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   327
            if (!$success) {
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   328
                require_once 'Zend/Search/Lucene/Exception.php';
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   329
                throw new Zend_Search_Lucene_Exception("Error occured while loading highlighted text fragment: '$highlightedWordNodeSetHtml'.");
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   330
            }
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   331
            $highlightedWordNodeSetXpath = new DOMXPath($highlightedWordNodeSetDomDocument);
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   332
            $highlightedWordNodeSet      = $highlightedWordNodeSetXpath->query('/html/body')->item(0)->childNodes;
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   333
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   334
            for ($count = 0; $count < $highlightedWordNodeSet->length; $count++) {
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   335
                $nodeToImport = $highlightedWordNodeSet->item($count);
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   336
                $node->parentNode->insertBefore($this->_doc->importNode($nodeToImport, true /* deep copy */),
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   337
                                                $matchedWordNode);
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   338
            }
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   339
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   340
            $node->parentNode->removeChild($matchedWordNode);
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   341
        }
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   342
    }
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   343
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   344
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   345
    /**
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   346
     * highlight words in content of the specified node
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   347
     *
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   348
     * @param DOMNode $contextNode
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   349
     * @param array $wordsToHighlight
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   350
     * @param callback $callback   Callback method, used to transform (highlighting) text.
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   351
     * @param array    $params     Array of additionall callback parameters (first non-optional parameter is a text to transform)
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   352
     */
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   353
    protected function _highlightNodeRecursive(DOMNode $contextNode, $wordsToHighlight, $callback, $params)
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   354
    {
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   355
        $textNodes = array();
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   356
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   357
        if (!$contextNode->hasChildNodes()) {
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   358
            return;
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   359
        }
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   360
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   361
        foreach ($contextNode->childNodes as $childNode) {
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   362
            if ($childNode->nodeType == XML_TEXT_NODE) {
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   363
                // process node later to leave childNodes structure untouched
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   364
                $textNodes[] = $childNode;
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   365
            } else {
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   366
                // Process node if it's not a script node
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   367
                if ($childNode->nodeName != 'script') {
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   368
                    $this->_highlightNodeRecursive($childNode, $wordsToHighlight, $callback, $params);
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   369
                }
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   370
            }
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   371
        }
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   372
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   373
        foreach ($textNodes as $textNode) {
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   374
            $this->_highlightTextNode($textNode, $wordsToHighlight, $callback, $params);
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   375
        }
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   376
    }
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   377
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   378
    /**
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   379
     * Standard callback method used to highlight words.
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   380
     *
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   381
     * @param  string  $stringToHighlight
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   382
     * @return string
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   383
     * @internal
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   384
     */
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   385
    public function applyColour($stringToHighlight, $colour)
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   386
    {
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   387
        return '<b style="color:black;background-color:' . $colour . '">' . $stringToHighlight . '</b>';
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   388
    }
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   389
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   390
    /**
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   391
     * Highlight text with specified color
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   392
     *
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   393
     * @param string|array $words
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   394
     * @param string $colour
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   395
     * @return string
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   396
     */
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   397
    public function highlight($words, $colour = '#66ffff')
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   398
    {
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   399
        return $this->highlightExtended($words, array($this, 'applyColour'), array($colour));
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   400
    }
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   401
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   402
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   403
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   404
    /**
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   405
     * Highlight text using specified View helper or callback function.
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   406
     *
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   407
     * @param string|array $words  Words to highlight. Words could be organized using the array or string.
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   408
     * @param callback $callback   Callback method, used to transform (highlighting) text.
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   409
     * @param array    $params     Array of additionall callback parameters passed through into it
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   410
     *                             (first non-optional parameter is an HTML fragment for highlighting)
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   411
     * @return string
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   412
     * @throws Zend_Search_Lucene_Exception
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   413
     */
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   414
    public function highlightExtended($words, $callback, $params = array())
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   415
    {
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   416
        /** Zend_Search_Lucene_Analysis_Analyzer */
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   417
        require_once 'Zend/Search/Lucene/Analysis/Analyzer.php';
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   418
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   419
        if (!is_array($words)) {
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   420
            $words = array($words);
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   421
        }
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   422
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   423
        $wordsToHighlightList = array();
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   424
        $analyzer = Zend_Search_Lucene_Analysis_Analyzer::getDefault();
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   425
        foreach ($words as $wordString) {
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   426
            $wordsToHighlightList[] = $analyzer->tokenize($wordString);
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   427
        }
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   428
        $wordsToHighlight = call_user_func_array('array_merge', $wordsToHighlightList);
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   429
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   430
        if (count($wordsToHighlight) == 0) {
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   431
            return $this->_doc->saveHTML();
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   432
        }
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   433
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   434
        $wordsToHighlightFlipped = array();
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   435
        foreach ($wordsToHighlight as $id => $token) {
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   436
            $wordsToHighlightFlipped[$token->getTermText()] = $id;
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   437
        }
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   438
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   439
        if (!is_callable($callback)) {
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   440
            require_once 'Zend/Search/Lucene/Exception.php';
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   441
            throw new Zend_Search_Lucene_Exception('$viewHelper parameter mast be a View Helper name, View Helper object or callback.');
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   442
        }
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   443
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   444
        $xpath = new DOMXPath($this->_doc);
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   445
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   446
        $matchedNodes = $xpath->query("/html/body");
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   447
        foreach ($matchedNodes as $matchedNode) {
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   448
            $this->_highlightNodeRecursive($matchedNode, $wordsToHighlightFlipped, $callback, $params);
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   449
        }
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   450
    }
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   451
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   452
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   453
    /**
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   454
     * Get HTML
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   455
     *
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   456
     * @return string
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   457
     */
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   458
    public function getHTML()
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   459
    {
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   460
        return $this->_doc->saveHTML();
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   461
    }
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   462
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   463
    /**
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   464
     * Get HTML body
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   465
     *
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   466
     * @return string
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   467
     */
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   468
    public function getHtmlBody()
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   469
    {
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   470
        $xpath = new DOMXPath($this->_doc);
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   471
        $bodyNodes = $xpath->query('/html/body')->item(0)->childNodes;
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   472
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   473
        $outputFragments = array();
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   474
        for ($count = 0; $count < $bodyNodes->length; $count++) {
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   475
            $outputFragments[] = $this->_doc->saveXML($bodyNodes->item($count));
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   476
        }
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   477
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   478
        return implode($outputFragments);
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   479
    }
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   480
}
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   481