Blame view
public/vendor/masterminds/html5/src/HTML5.php
8.35 KB
86143e36f Коммит вторник |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 |
<?php namespace Masterminds; use Masterminds\HTML5\Parser\DOMTreeBuilder; use Masterminds\HTML5\Parser\Scanner; use Masterminds\HTML5\Parser\Tokenizer; use Masterminds\HTML5\Serializer\OutputRules; use Masterminds\HTML5\Serializer\Traverser; /** * This class offers convenience methods for parsing and serializing HTML5. * It is roughly designed to mirror the \DOMDocument native class. */ class HTML5 { /** * Global options for the parser and serializer. * * @var array */ private $defaultOptions = array( // Whether the serializer should aggressively encode all characters as entities. 'encode_entities' => false, // Prevents the parser from automatically assigning the HTML5 namespace to the DOM document. 'disable_html_ns' => false, ); protected $errors = array(); public function __construct(array $defaultOptions = array()) { $this->defaultOptions = array_merge($this->defaultOptions, $defaultOptions); } /** * Get the current default options. * * @return array */ public function getOptions() { return $this->defaultOptions; } /** * Load and parse an HTML file. * * This will apply the HTML5 parser, which is tolerant of many * varieties of HTML, including XHTML 1, HTML 4, and well-formed HTML * 3. Note that in these cases, not all of the old data will be * preserved. For example, XHTML's XML declaration will be removed. * * The rules governing parsing are set out in the HTML 5 spec. * * @param string|resource $file The path to the file to parse. If this is a resource, it is * assumed to be an open stream whose pointer is set to the first * byte of input. * @param array $options Configuration options when parsing the HTML. * * @return \DOMDocument A DOM document. These object type is defined by the libxml * library, and should have been included with your version of PHP. */ public function load($file, array $options = array()) { // Handle the case where file is a resource. if (is_resource($file)) { return $this->parse(stream_get_contents($file), $options); } return $this->parse(file_get_contents($file), $options); } /** * Parse a HTML Document from a string. * * Take a string of HTML 5 (or earlier) and parse it into a * DOMDocument. * * @param string $string A html5 document as a string. * @param array $options Configuration options when parsing the HTML. * * @return \DOMDocument A DOM document. DOM is part of libxml, which is included with * almost all distribtions of PHP. */ public function loadHTML($string, array $options = array()) { return $this->parse($string, $options); } /** * Convenience function to load an HTML file. * * This is here to provide backwards compatibility with the * PHP DOM implementation. It simply calls load(). * * @param string $file The path to the file to parse. If this is a resource, it is * assumed to be an open stream whose pointer is set to the first * byte of input. * @param array $options Configuration options when parsing the HTML. * * @return \DOMDocument A DOM document. These object type is defined by the libxml * library, and should have been included with your version of PHP. */ public function loadHTMLFile($file, array $options = array()) { return $this->load($file, $options); } /** * Parse a HTML fragment from a string. * * @param string $string the HTML5 fragment as a string * @param array $options Configuration options when parsing the HTML * * @return \DOMDocumentFragment A DOM fragment. The DOM is part of libxml, which is included with * almost all distributions of PHP. */ public function loadHTMLFragment($string, array $options = array()) { return $this->parseFragment($string, $options); } /** * Return all errors encountered into parsing phase. * * @return array */ public function getErrors() { return $this->errors; } /** * Return true it some errors were encountered into parsing phase. * * @return bool */ public function hasErrors() { return count($this->errors) > 0; } /** * Parse an input string. * * @param string $input * @param array $options * * @return \DOMDocument */ public function parse($input, array $options = array()) { $this->errors = array(); $options = array_merge($this->defaultOptions, $options); $events = new DOMTreeBuilder(false, $options); $scanner = new Scanner($input, !empty($options['encoding']) ? $options['encoding'] : 'UTF-8'); $parser = new Tokenizer($scanner, $events, !empty($options['xmlNamespaces']) ? Tokenizer::CONFORMANT_XML : Tokenizer::CONFORMANT_HTML); $parser->parse(); $this->errors = $events->getErrors(); return $events->document(); } /** * Parse an input stream where the stream is a fragment. * * Lower-level loading function. This requires an input stream instead * of a string, file, or resource. * * @param string $input The input data to parse in the form of a string. * @param array $options An array of options. * * @return \DOMDocumentFragment */ public function parseFragment($input, array $options = array()) { $options = array_merge($this->defaultOptions, $options); $events = new DOMTreeBuilder(true, $options); $scanner = new Scanner($input, !empty($options['encoding']) ? $options['encoding'] : 'UTF-8'); $parser = new Tokenizer($scanner, $events, !empty($options['xmlNamespaces']) ? Tokenizer::CONFORMANT_XML : Tokenizer::CONFORMANT_HTML); $parser->parse(); $this->errors = $events->getErrors(); return $events->fragment(); } /** * Save a DOM into a given file as HTML5. * * @param mixed $dom The DOM to be serialized. * @param string|resource $file The filename to be written or resource to write to. * @param array $options Configuration options when serializing the DOM. These include: * - encode_entities: Text written to the output is escaped by default and not all * entities are encoded. If this is set to true all entities will be encoded. * Defaults to false. */ public function save($dom, $file, $options = array()) { $close = true; if (is_resource($file)) { $stream = $file; $close = false; } else { $stream = fopen($file, 'wb'); } $options = array_merge($this->defaultOptions, $options); $rules = new OutputRules($stream, $options); $trav = new Traverser($dom, $stream, $rules, $options); $trav->walk(); /* * release the traverser to avoid cyclic references and allow PHP to free memory without waiting for gc_collect_cycles */ $rules->unsetTraverser(); if ($close) { fclose($stream); } } /** * Convert a DOM into an HTML5 string. * * @param mixed $dom The DOM to be serialized. * @param array $options Configuration options when serializing the DOM. These include: * - encode_entities: Text written to the output is escaped by default and not all * entities are encoded. If this is set to true all entities will be encoded. * Defaults to false. * * @return string A HTML5 documented generated from the DOM. */ public function saveHTML($dom, $options = array()) { $stream = fopen('php://temp', 'wb'); $this->save($dom, $stream, array_merge($this->defaultOptions, $options)); $html = stream_get_contents($stream, -1, 0); fclose($stream); return $html; } } |