1: <?php
2: /**
3: * SimplePie
4: *
5: * A PHP-Based RSS and Atom Feed Framework.
6: * Takes the hard work out of managing a complete RSS/Atom solution.
7: *
8: * Copyright (c) 2004-2012, Ryan Parman, Geoffrey Sneddon, Ryan McCue, and contributors
9: * All rights reserved.
10: *
11: * Redistribution and use in source and binary forms, with or without modification, are
12: * permitted provided that the following conditions are met:
13: *
14: * * Redistributions of source code must retain the above copyright notice, this list of
15: * conditions and the following disclaimer.
16: *
17: * * Redistributions in binary form must reproduce the above copyright notice, this list
18: * of conditions and the following disclaimer in the documentation and/or other materials
19: * provided with the distribution.
20: *
21: * * Neither the name of the SimplePie Team nor the names of its contributors may be used
22: * to endorse or promote products derived from this software without specific prior
23: * written permission.
24: *
25: * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS
26: * OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY
27: * AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDERS
28: * AND CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
29: * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
30: * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
31: * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
32: * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
33: * POSSIBILITY OF SUCH DAMAGE.
34: *
35: * @package SimplePie
36: * @version 1.3
37: * @copyright 2004-2012 Ryan Parman, Geoffrey Sneddon, Ryan McCue
38: * @author Ryan Parman
39: * @author Geoffrey Sneddon
40: * @author Ryan McCue
41: * @link http://simplepie.org/ SimplePie
42: * @license http://www.opensource.org/licenses/bsd-license.php BSD License
43: */
44:
45: /**
46: * IRI parser/serialiser/normaliser
47: *
48: * @package SimplePie
49: * @subpackage HTTP
50: * @author Geoffrey Sneddon
51: * @author Steve Minutillo
52: * @author Ryan McCue
53: * @copyright 2007-2012 Geoffrey Sneddon, Steve Minutillo, Ryan McCue
54: * @license http://www.opensource.org/licenses/bsd-license.php
55: */
56: class SimplePie_IRI
57: {
58: /**
59: * Scheme
60: *
61: * @var string
62: */
63: protected $scheme = null;
64:
65: /**
66: * User Information
67: *
68: * @var string
69: */
70: protected $iuserinfo = null;
71:
72: /**
73: * ihost
74: *
75: * @var string
76: */
77: protected $ihost = null;
78:
79: /**
80: * Port
81: *
82: * @var string
83: */
84: protected $port = null;
85:
86: /**
87: * ipath
88: *
89: * @var string
90: */
91: protected $ipath = '';
92:
93: /**
94: * iquery
95: *
96: * @var string
97: */
98: protected $iquery = null;
99:
100: /**
101: * ifragment
102: *
103: * @var string
104: */
105: protected $ifragment = null;
106:
107: /**
108: * Normalization database
109: *
110: * Each key is the scheme, each value is an array with each key as the IRI
111: * part and value as the default value for that part.
112: */
113: protected $normalization = array(
114: 'acap' => array(
115: 'port' => 674
116: ),
117: 'dict' => array(
118: 'port' => 2628
119: ),
120: 'file' => array(
121: 'ihost' => 'localhost'
122: ),
123: 'http' => array(
124: 'port' => 80,
125: 'ipath' => '/'
126: ),
127: 'https' => array(
128: 'port' => 443,
129: 'ipath' => '/'
130: ),
131: );
132:
133: /**
134: * Return the entire IRI when you try and read the object as a string
135: *
136: * @return string
137: */
138: public function __toString()
139: {
140: return $this->get_iri();
141: }
142:
143: /**
144: * Overload __set() to provide access via properties
145: *
146: * @param string $name Property name
147: * @param mixed $value Property value
148: */
149: public function __set($name, $value)
150: {
151: if (method_exists($this, 'set_' . $name))
152: {
153: call_user_func(array($this, 'set_' . $name), $value);
154: }
155: elseif (
156: $name === 'iauthority'
157: || $name === 'iuserinfo'
158: || $name === 'ihost'
159: || $name === 'ipath'
160: || $name === 'iquery'
161: || $name === 'ifragment'
162: )
163: {
164: call_user_func(array($this, 'set_' . substr($name, 1)), $value);
165: }
166: }
167:
168: /**
169: * Overload __get() to provide access via properties
170: *
171: * @param string $name Property name
172: * @return mixed
173: */
174: public function __get($name)
175: {
176: // isset() returns false for null, we don't want to do that
177: // Also why we use array_key_exists below instead of isset()
178: $props = get_object_vars($this);
179:
180: if (
181: $name === 'iri' ||
182: $name === 'uri' ||
183: $name === 'iauthority' ||
184: $name === 'authority'
185: )
186: {
187: $return = $this->{"get_$name"}();
188: }
189: elseif (array_key_exists($name, $props))
190: {
191: $return = $this->$name;
192: }
193: // host -> ihost
194: elseif (($prop = 'i' . $name) && array_key_exists($prop, $props))
195: {
196: $name = $prop;
197: $return = $this->$prop;
198: }
199: // ischeme -> scheme
200: elseif (($prop = substr($name, 1)) && array_key_exists($prop, $props))
201: {
202: $name = $prop;
203: $return = $this->$prop;
204: }
205: else
206: {
207: trigger_error('Undefined property: ' . get_class($this) . '::' . $name, E_USER_NOTICE);
208: $return = null;
209: }
210:
211: if ($return === null && isset($this->normalization[$this->scheme][$name]))
212: {
213: return $this->normalization[$this->scheme][$name];
214: }
215: else
216: {
217: return $return;
218: }
219: }
220:
221: /**
222: * Overload __isset() to provide access via properties
223: *
224: * @param string $name Property name
225: * @return bool
226: */
227: public function __isset($name)
228: {
229: if (method_exists($this, 'get_' . $name) || isset($this->$name))
230: {
231: return true;
232: }
233: else
234: {
235: return false;
236: }
237: }
238:
239: /**
240: * Overload __unset() to provide access via properties
241: *
242: * @param string $name Property name
243: */
244: public function __unset($name)
245: {
246: if (method_exists($this, 'set_' . $name))
247: {
248: call_user_func(array($this, 'set_' . $name), '');
249: }
250: }
251:
252: /**
253: * Create a new IRI object, from a specified string
254: *
255: * @param string $iri
256: */
257: public function __construct($iri = null)
258: {
259: $this->set_iri($iri);
260: }
261:
262: /**
263: * Create a new IRI object by resolving a relative IRI
264: *
265: * Returns false if $base is not absolute, otherwise an IRI.
266: *
267: * @param IRI|string $base (Absolute) Base IRI
268: * @param IRI|string $relative Relative IRI
269: * @return IRI|false
270: */
271: public static function absolutize($base, $relative)
272: {
273: if (!($relative instanceof SimplePie_IRI))
274: {
275: $relative = new SimplePie_IRI($relative);
276: }
277: if (!$relative->is_valid())
278: {
279: return false;
280: }
281: elseif ($relative->scheme !== null)
282: {
283: return clone $relative;
284: }
285: else
286: {
287: if (!($base instanceof SimplePie_IRI))
288: {
289: $base = new SimplePie_IRI($base);
290: }
291: if ($base->scheme !== null && $base->is_valid())
292: {
293: if ($relative->get_iri() !== '')
294: {
295: if ($relative->iuserinfo !== null || $relative->ihost !== null || $relative->port !== null)
296: {
297: $target = clone $relative;
298: $target->scheme = $base->scheme;
299: }
300: else
301: {
302: $target = new SimplePie_IRI;
303: $target->scheme = $base->scheme;
304: $target->iuserinfo = $base->iuserinfo;
305: $target->ihost = $base->ihost;
306: $target->port = $base->port;
307: if ($relative->ipath !== '')
308: {
309: if ($relative->ipath[0] === '/')
310: {
311: $target->ipath = $relative->ipath;
312: }
313: elseif (($base->iuserinfo !== null || $base->ihost !== null || $base->port !== null) && $base->ipath === '')
314: {
315: $target->ipath = '/' . $relative->ipath;
316: }
317: elseif (($last_segment = strrpos($base->ipath, '/')) !== false)
318: {
319: $target->ipath = substr($base->ipath, 0, $last_segment + 1) . $relative->ipath;
320: }
321: else
322: {
323: $target->ipath = $relative->ipath;
324: }
325: $target->ipath = $target->remove_dot_segments($target->ipath);
326: $target->iquery = $relative->iquery;
327: }
328: else
329: {
330: $target->ipath = $base->ipath;
331: if ($relative->iquery !== null)
332: {
333: $target->iquery = $relative->iquery;
334: }
335: elseif ($base->iquery !== null)
336: {
337: $target->iquery = $base->iquery;
338: }
339: }
340: $target->ifragment = $relative->ifragment;
341: }
342: }
343: else
344: {
345: $target = clone $base;
346: $target->ifragment = null;
347: }
348: $target->scheme_normalization();
349: return $target;
350: }
351: else
352: {
353: return false;
354: }
355: }
356: }
357:
358: /**
359: * Parse an IRI into scheme/authority/path/query/fragment segments
360: *
361: * @param string $iri
362: * @return array
363: */
364: protected function parse_iri($iri)
365: {
366: $iri = trim($iri, "\x20\x09\x0A\x0C\x0D");
367: if (preg_match('/^((?P<scheme>[^:\/?#]+):)?(\/\/(?P<authority>[^\/?#]*))?(?P<path>[^?#]*)(\?(?P<query>[^#]*))?(#(?P<fragment>.*))?$/', $iri, $match))
368: {
369: if ($match[1] === '')
370: {
371: $match['scheme'] = null;
372: }
373: if (!isset($match[3]) || $match[3] === '')
374: {
375: $match['authority'] = null;
376: }
377: if (!isset($match[5]))
378: {
379: $match['path'] = '';
380: }
381: if (!isset($match[6]) || $match[6] === '')
382: {
383: $match['query'] = null;
384: }
385: if (!isset($match[8]) || $match[8] === '')
386: {
387: $match['fragment'] = null;
388: }
389: return $match;
390: }
391: else
392: {
393: trigger_error('This should never happen', E_USER_ERROR);
394: die;
395: }
396: }
397:
398: /**
399: * Remove dot segments from a path
400: *
401: * @param string $input
402: * @return string
403: */
404: protected function remove_dot_segments($input)
405: {
406: $output = '';
407: while (strpos($input, './') !== false || strpos($input, '/.') !== false || $input === '.' || $input === '..')
408: {
409: // A: If the input buffer begins with a prefix of "../" or "./", then remove that prefix from the input buffer; otherwise,
410: if (strpos($input, '../') === 0)
411: {
412: $input = substr($input, 3);
413: }
414: elseif (strpos($input, './') === 0)
415: {
416: $input = substr($input, 2);
417: }
418: // B: if the input buffer begins with a prefix of "/./" or "/.", where "." is a complete path segment, then replace that prefix with "/" in the input buffer; otherwise,
419: elseif (strpos($input, '/./') === 0)
420: {
421: $input = substr($input, 2);
422: }
423: elseif ($input === '/.')
424: {
425: $input = '/';
426: }
427: // C: if the input buffer begins with a prefix of "/../" or "/..", where ".." is a complete path segment, then replace that prefix with "/" in the input buffer and remove the last segment and its preceding "/" (if any) from the output buffer; otherwise,
428: elseif (strpos($input, '/../') === 0)
429: {
430: $input = substr($input, 3);
431: $output = substr_replace($output, '', strrpos($output, '/'));
432: }
433: elseif ($input === '/..')
434: {
435: $input = '/';
436: $output = substr_replace($output, '', strrpos($output, '/'));
437: }
438: // D: if the input buffer consists only of "." or "..", then remove that from the input buffer; otherwise,
439: elseif ($input === '.' || $input === '..')
440: {
441: $input = '';
442: }
443: // E: move the first path segment in the input buffer to the end of the output buffer, including the initial "/" character (if any) and any subsequent characters up to, but not including, the next "/" character or the end of the input buffer
444: elseif (($pos = strpos($input, '/', 1)) !== false)
445: {
446: $output .= substr($input, 0, $pos);
447: $input = substr_replace($input, '', 0, $pos);
448: }
449: else
450: {
451: $output .= $input;
452: $input = '';
453: }
454: }
455: return $output . $input;
456: }
457:
458: /**
459: * Replace invalid character with percent encoding
460: *
461: * @param string $string Input string
462: * @param string $extra_chars Valid characters not in iunreserved or
463: * iprivate (this is ASCII-only)
464: * @param bool $iprivate Allow iprivate
465: * @return string
466: */
467: protected function replace_invalid_with_pct_encoding($string, $extra_chars, $iprivate = false)
468: {
469: // Normalize as many pct-encoded sections as possible
470: $string = preg_replace_callback('/(?:%[A-Fa-f0-9]{2})+/', array($this, 'remove_iunreserved_percent_encoded'), $string);
471:
472: // Replace invalid percent characters
473: $string = preg_replace('/%(?![A-Fa-f0-9]{2})/', '%25', $string);
474:
475: // Add unreserved and % to $extra_chars (the latter is safe because all
476: // pct-encoded sections are now valid).
477: $extra_chars .= 'ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-._~%';
478:
479: // Now replace any bytes that aren't allowed with their pct-encoded versions
480: $position = 0;
481: $strlen = strlen($string);
482: while (($position += strspn($string, $extra_chars, $position)) < $strlen)
483: {
484: $value = ord($string[$position]);
485:
486: // Start position
487: $start = $position;
488:
489: // By default we are valid
490: $valid = true;
491:
492: // No one byte sequences are valid due to the while.
493: // Two byte sequence:
494: if (($value & 0xE0) === 0xC0)
495: {
496: $character = ($value & 0x1F) << 6;
497: $length = 2;
498: $remaining = 1;
499: }
500: // Three byte sequence:
501: elseif (($value & 0xF0) === 0xE0)
502: {
503: $character = ($value & 0x0F) << 12;
504: $length = 3;
505: $remaining = 2;
506: }
507: // Four byte sequence:
508: elseif (($value & 0xF8) === 0xF0)
509: {
510: $character = ($value & 0x07) << 18;
511: $length = 4;
512: $remaining = 3;
513: }
514: // Invalid byte:
515: else
516: {
517: $valid = false;
518: $length = 1;
519: $remaining = 0;
520: }
521:
522: if ($remaining)
523: {
524: if ($position + $length <= $strlen)
525: {
526: for ($position++; $remaining; $position++)
527: {
528: $value = ord($string[$position]);
529:
530: // Check that the byte is valid, then add it to the character:
531: if (($value & 0xC0) === 0x80)
532: {
533: $character |= ($value & 0x3F) << (--$remaining * 6);
534: }
535: // If it is invalid, count the sequence as invalid and reprocess the current byte:
536: else
537: {
538: $valid = false;
539: $position--;
540: break;
541: }
542: }
543: }
544: else
545: {
546: $position = $strlen - 1;
547: $valid = false;
548: }
549: }
550:
551: // Percent encode anything invalid or not in ucschar
552: if (
553: // Invalid sequences
554: !$valid
555: // Non-shortest form sequences are invalid
556: || $length > 1 && $character <= 0x7F
557: || $length > 2 && $character <= 0x7FF
558: || $length > 3 && $character <= 0xFFFF
559: // Outside of range of ucschar codepoints
560: // Noncharacters
561: || ($character & 0xFFFE) === 0xFFFE
562: || $character >= 0xFDD0 && $character <= 0xFDEF
563: || (
564: // Everything else not in ucschar
565: $character > 0xD7FF && $character < 0xF900
566: || $character < 0xA0
567: || $character > 0xEFFFD
568: )
569: && (
570: // Everything not in iprivate, if it applies
571: !$iprivate
572: || $character < 0xE000
573: || $character > 0x10FFFD
574: )
575: )
576: {
577: // If we were a character, pretend we weren't, but rather an error.
578: if ($valid)
579: $position--;
580:
581: for ($j = $start; $j <= $position; $j++)
582: {
583: $string = substr_replace($string, sprintf('%%%02X', ord($string[$j])), $j, 1);
584: $j += 2;
585: $position += 2;
586: $strlen += 2;
587: }
588: }
589: }
590:
591: return $string;
592: }
593:
594: /**
595: * Callback function for preg_replace_callback.
596: *
597: * Removes sequences of percent encoded bytes that represent UTF-8
598: * encoded characters in iunreserved
599: *
600: * @param array $match PCRE match
601: * @return string Replacement
602: */
603: protected function remove_iunreserved_percent_encoded($match)
604: {
605: // As we just have valid percent encoded sequences we can just explode
606: // and ignore the first member of the returned array (an empty string).
607: $bytes = explode('%', $match[0]);
608:
609: // Initialize the new string (this is what will be returned) and that
610: // there are no bytes remaining in the current sequence (unsurprising
611: // at the first byte!).
612: $string = '';
613: $remaining = 0;
614:
615: // Loop over each and every byte, and set $value to its value
616: for ($i = 1, $len = count($bytes); $i < $len; $i++)
617: {
618: $value = hexdec($bytes[$i]);
619:
620: // If we're the first byte of sequence:
621: if (!$remaining)
622: {
623: // Start position
624: $start = $i;
625:
626: // By default we are valid
627: $valid = true;
628:
629: // One byte sequence:
630: if ($value <= 0x7F)
631: {
632: $character = $value;
633: $length = 1;
634: }
635: // Two byte sequence:
636: elseif (($value & 0xE0) === 0xC0)
637: {
638: $character = ($value & 0x1F) << 6;
639: $length = 2;
640: $remaining = 1;
641: }
642: // Three byte sequence:
643: elseif (($value & 0xF0) === 0xE0)
644: {
645: $character = ($value & 0x0F) << 12;
646: $length = 3;
647: $remaining = 2;
648: }
649: // Four byte sequence:
650: elseif (($value & 0xF8) === 0xF0)
651: {
652: $character = ($value & 0x07) << 18;
653: $length = 4;
654: $remaining = 3;
655: }
656: // Invalid byte:
657: else
658: {
659: $valid = false;
660: $remaining = 0;
661: }
662: }
663: // Continuation byte:
664: else
665: {
666: // Check that the byte is valid, then add it to the character:
667: if (($value & 0xC0) === 0x80)
668: {
669: $remaining--;
670: $character |= ($value & 0x3F) << ($remaining * 6);
671: }
672: // If it is invalid, count the sequence as invalid and reprocess the current byte as the start of a sequence:
673: else
674: {
675: $valid = false;
676: $remaining = 0;
677: $i--;
678: }
679: }
680:
681: // If we've reached the end of the current byte sequence, append it to Unicode::$data
682: if (!$remaining)
683: {
684: // Percent encode anything invalid or not in iunreserved
685: if (
686: // Invalid sequences
687: !$valid
688: // Non-shortest form sequences are invalid
689: || $length > 1 && $character <= 0x7F
690: || $length > 2 && $character <= 0x7FF
691: || $length > 3 && $character <= 0xFFFF
692: // Outside of range of iunreserved codepoints
693: || $character < 0x2D
694: || $character > 0xEFFFD
695: // Noncharacters
696: || ($character & 0xFFFE) === 0xFFFE
697: || $character >= 0xFDD0 && $character <= 0xFDEF
698: // Everything else not in iunreserved (this is all BMP)
699: || $character === 0x2F
700: || $character > 0x39 && $character < 0x41
701: || $character > 0x5A && $character < 0x61
702: || $character > 0x7A && $character < 0x7E
703: || $character > 0x7E && $character < 0xA0
704: || $character > 0xD7FF && $character < 0xF900
705: )
706: {
707: for ($j = $start; $j <= $i; $j++)
708: {
709: $string .= '%' . strtoupper($bytes[$j]);
710: }
711: }
712: else
713: {
714: for ($j = $start; $j <= $i; $j++)
715: {
716: $string .= chr(hexdec($bytes[$j]));
717: }
718: }
719: }
720: }
721:
722: // If we have any bytes left over they are invalid (i.e., we are
723: // mid-way through a multi-byte sequence)
724: if ($remaining)
725: {
726: for ($j = $start; $j < $len; $j++)
727: {
728: $string .= '%' . strtoupper($bytes[$j]);
729: }
730: }
731:
732: return $string;
733: }
734:
735: protected function scheme_normalization()
736: {
737: if (isset($this->normalization[$this->scheme]['iuserinfo']) && $this->iuserinfo === $this->normalization[$this->scheme]['iuserinfo'])
738: {
739: $this->iuserinfo = null;
740: }
741: if (isset($this->normalization[$this->scheme]['ihost']) && $this->ihost === $this->normalization[$this->scheme]['ihost'])
742: {
743: $this->ihost = null;
744: }
745: if (isset($this->normalization[$this->scheme]['port']) && $this->port === $this->normalization[$this->scheme]['port'])
746: {
747: $this->port = null;
748: }
749: if (isset($this->normalization[$this->scheme]['ipath']) && $this->ipath === $this->normalization[$this->scheme]['ipath'])
750: {
751: $this->ipath = '';
752: }
753: if (isset($this->normalization[$this->scheme]['iquery']) && $this->iquery === $this->normalization[$this->scheme]['iquery'])
754: {
755: $this->iquery = null;
756: }
757: if (isset($this->normalization[$this->scheme]['ifragment']) && $this->ifragment === $this->normalization[$this->scheme]['ifragment'])
758: {
759: $this->ifragment = null;
760: }
761: }
762:
763: /**
764: * Check if the object represents a valid IRI. This needs to be done on each
765: * call as some things change depending on another part of the IRI.
766: *
767: * @return bool
768: */
769: public function is_valid()
770: {
771: $isauthority = $this->iuserinfo !== null || $this->ihost !== null || $this->port !== null;
772: if ($this->ipath !== '' &&
773: (
774: $isauthority && (
775: $this->ipath[0] !== '/' ||
776: substr($this->ipath, 0, 2) === '//'
777: ) ||
778: (
779: $this->scheme === null &&
780: !$isauthority &&
781: strpos($this->ipath, ':') !== false &&
782: (strpos($this->ipath, '/') === false ? true : strpos($this->ipath, ':') < strpos($this->ipath, '/'))
783: )
784: )
785: )
786: {
787: return false;
788: }
789:
790: return true;
791: }
792:
793: /**
794: * Set the entire IRI. Returns true on success, false on failure (if there
795: * are any invalid characters).
796: *
797: * @param string $iri
798: * @return bool
799: */
800: public function set_iri($iri)
801: {
802: static $cache;
803: if (!$cache)
804: {
805: $cache = array();
806: }
807:
808: if ($iri === null)
809: {
810: return true;
811: }
812: elseif (isset($cache[$iri]))
813: {
814: list($this->scheme,
815: $this->iuserinfo,
816: $this->ihost,
817: $this->port,
818: $this->ipath,
819: $this->iquery,
820: $this->ifragment,
821: $return) = $cache[$iri];
822: return $return;
823: }
824: else
825: {
826: $parsed = $this->parse_iri((string) $iri);
827:
828: $return = $this->set_scheme($parsed['scheme'])
829: && $this->set_authority($parsed['authority'])
830: && $this->set_path($parsed['path'])
831: && $this->set_query($parsed['query'])
832: && $this->set_fragment($parsed['fragment']);
833:
834: $cache[$iri] = array($this->scheme,
835: $this->iuserinfo,
836: $this->ihost,
837: $this->port,
838: $this->ipath,
839: $this->iquery,
840: $this->ifragment,
841: $return);
842: return $return;
843: }
844: }
845:
846: /**
847: * Set the scheme. Returns true on success, false on failure (if there are
848: * any invalid characters).
849: *
850: * @param string $scheme
851: * @return bool
852: */
853: public function set_scheme($scheme)
854: {
855: if ($scheme === null)
856: {
857: $this->scheme = null;
858: }
859: elseif (!preg_match('/^[A-Za-z][0-9A-Za-z+\-.]*$/', $scheme))
860: {
861: $this->scheme = null;
862: return false;
863: }
864: else
865: {
866: $this->scheme = strtolower($scheme);
867: }
868: return true;
869: }
870:
871: /**
872: * Set the authority. Returns true on success, false on failure (if there are
873: * any invalid characters).
874: *
875: * @param string $authority
876: * @return bool
877: */
878: public function set_authority($authority)
879: {
880: static $cache;
881: if (!$cache)
882: $cache = array();
883:
884: if ($authority === null)
885: {
886: $this->iuserinfo = null;
887: $this->ihost = null;
888: $this->port = null;
889: return true;
890: }
891: elseif (isset($cache[$authority]))
892: {
893: list($this->iuserinfo,
894: $this->ihost,
895: $this->port,
896: $return) = $cache[$authority];
897:
898: return $return;
899: }
900: else
901: {
902: $remaining = $authority;
903: if (($iuserinfo_end = strrpos($remaining, '@')) !== false)
904: {
905: $iuserinfo = substr($remaining, 0, $iuserinfo_end);
906: $remaining = substr($remaining, $iuserinfo_end + 1);
907: }
908: else
909: {
910: $iuserinfo = null;
911: }
912: if (($port_start = strpos($remaining, ':', strpos($remaining, ']'))) !== false)
913: {
914: if (($port = substr($remaining, $port_start + 1)) === false)
915: {
916: $port = null;
917: }
918: $remaining = substr($remaining, 0, $port_start);
919: }
920: else
921: {
922: $port = null;
923: }
924:
925: $return = $this->set_userinfo($iuserinfo) &&
926: $this->set_host($remaining) &&
927: $this->set_port($port);
928:
929: $cache[$authority] = array($this->iuserinfo,
930: $this->ihost,
931: $this->port,
932: $return);
933:
934: return $return;
935: }
936: }
937:
938: /**
939: * Set the iuserinfo.
940: *
941: * @param string $iuserinfo
942: * @return bool
943: */
944: public function set_userinfo($iuserinfo)
945: {
946: if ($iuserinfo === null)
947: {
948: $this->iuserinfo = null;
949: }
950: else
951: {
952: $this->iuserinfo = $this->replace_invalid_with_pct_encoding($iuserinfo, '!$&\'()*+,;=:');
953: $this->scheme_normalization();
954: }
955:
956: return true;
957: }
958:
959: /**
960: * Set the ihost. Returns true on success, false on failure (if there are
961: * any invalid characters).
962: *
963: * @param string $ihost
964: * @return bool
965: */
966: public function set_host($ihost)
967: {
968: if ($ihost === null)
969: {
970: $this->ihost = null;
971: return true;
972: }
973: elseif (substr($ihost, 0, 1) === '[' && substr($ihost, -1) === ']')
974: {
975: if (SimplePie_Net_IPv6::check_ipv6(substr($ihost, 1, -1)))
976: {
977: $this->ihost = '[' . SimplePie_Net_IPv6::compress(substr($ihost, 1, -1)) . ']';
978: }
979: else
980: {
981: $this->ihost = null;
982: return false;
983: }
984: }
985: else
986: {
987: $ihost = $this->replace_invalid_with_pct_encoding($ihost, '!$&\'()*+,;=');
988:
989: // Lowercase, but ignore pct-encoded sections (as they should
990: // remain uppercase). This must be done after the previous step
991: // as that can add unescaped characters.
992: $position = 0;
993: $strlen = strlen($ihost);
994: while (($position += strcspn($ihost, 'ABCDEFGHIJKLMNOPQRSTUVWXYZ%', $position)) < $strlen)
995: {
996: if ($ihost[$position] === '%')
997: {
998: $position += 3;
999: }
1000: else
1001: {
1002: $ihost[$position] = strtolower($ihost[$position]);
1003: $position++;
1004: }
1005: }
1006:
1007: $this->ihost = $ihost;
1008: }
1009:
1010: $this->scheme_normalization();
1011:
1012: return true;
1013: }
1014:
1015: /**
1016: * Set the port. Returns true on success, false on failure (if there are
1017: * any invalid characters).
1018: *
1019: * @param string $port
1020: * @return bool
1021: */
1022: public function set_port($port)
1023: {
1024: if ($port === null)
1025: {
1026: $this->port = null;
1027: return true;
1028: }
1029: elseif (strspn($port, '0123456789') === strlen($port))
1030: {
1031: $this->port = (int) $port;
1032: $this->scheme_normalization();
1033: return true;
1034: }
1035: else
1036: {
1037: $this->port = null;
1038: return false;
1039: }
1040: }
1041:
1042: /**
1043: * Set the ipath.
1044: *
1045: * @param string $ipath
1046: * @return bool
1047: */
1048: public function set_path($ipath)
1049: {
1050: static $cache;
1051: if (!$cache)
1052: {
1053: $cache = array();
1054: }
1055:
1056: $ipath = (string) $ipath;
1057:
1058: if (isset($cache[$ipath]))
1059: {
1060: $this->ipath = $cache[$ipath][(int) ($this->scheme !== null)];
1061: }
1062: else
1063: {
1064: $valid = $this->replace_invalid_with_pct_encoding($ipath, '!$&\'()*+,;=@:/');
1065: $removed = $this->remove_dot_segments($valid);
1066:
1067: $cache[$ipath] = array($valid, $removed);
1068: $this->ipath = ($this->scheme !== null) ? $removed : $valid;
1069: }
1070:
1071: $this->scheme_normalization();
1072: return true;
1073: }
1074:
1075: /**
1076: * Set the iquery.
1077: *
1078: * @param string $iquery
1079: * @return bool
1080: */
1081: public function set_query($iquery)
1082: {
1083: if ($iquery === null)
1084: {
1085: $this->iquery = null;
1086: }
1087: else
1088: {
1089: $this->iquery = $this->replace_invalid_with_pct_encoding($iquery, '!$&\'()*+,;=:@/?', true);
1090: $this->scheme_normalization();
1091: }
1092: return true;
1093: }
1094:
1095: /**
1096: * Set the ifragment.
1097: *
1098: * @param string $ifragment
1099: * @return bool
1100: */
1101: public function set_fragment($ifragment)
1102: {
1103: if ($ifragment === null)
1104: {
1105: $this->ifragment = null;
1106: }
1107: else
1108: {
1109: $this->ifragment = $this->replace_invalid_with_pct_encoding($ifragment, '!$&\'()*+,;=:@/?');
1110: $this->scheme_normalization();
1111: }
1112: return true;
1113: }
1114:
1115: /**
1116: * Convert an IRI to a URI (or parts thereof)
1117: *
1118: * @return string
1119: */
1120: public function to_uri($string)
1121: {
1122: static $non_ascii;
1123: if (!$non_ascii)
1124: {
1125: $non_ascii = implode('', range("\x80", "\xFF"));
1126: }
1127:
1128: $position = 0;
1129: $strlen = strlen($string);
1130: while (($position += strcspn($string, $non_ascii, $position)) < $strlen)
1131: {
1132: $string = substr_replace($string, sprintf('%%%02X', ord($string[$position])), $position, 1);
1133: $position += 3;
1134: $strlen += 2;
1135: }
1136:
1137: return $string;
1138: }
1139:
1140: /**
1141: * Get the complete IRI
1142: *
1143: * @return string
1144: */
1145: public function get_iri()
1146: {
1147: if (!$this->is_valid())
1148: {
1149: return false;
1150: }
1151:
1152: $iri = '';
1153: if ($this->scheme !== null)
1154: {
1155: $iri .= $this->scheme . ':';
1156: }
1157: if (($iauthority = $this->get_iauthority()) !== null)
1158: {
1159: $iri .= '//' . $iauthority;
1160: }
1161: if ($this->ipath !== '')
1162: {
1163: $iri .= $this->ipath;
1164: }
1165: elseif (!empty($this->normalization[$this->scheme]['ipath']) && $iauthority !== null && $iauthority !== '')
1166: {
1167: $iri .= $this->normalization[$this->scheme]['ipath'];
1168: }
1169: if ($this->iquery !== null)
1170: {
1171: $iri .= '?' . $this->iquery;
1172: }
1173: if ($this->ifragment !== null)
1174: {
1175: $iri .= '#' . $this->ifragment;
1176: }
1177:
1178: return $iri;
1179: }
1180:
1181: /**
1182: * Get the complete URI
1183: *
1184: * @return string
1185: */
1186: public function get_uri()
1187: {
1188: return $this->to_uri($this->get_iri());
1189: }
1190:
1191: /**
1192: * Get the complete iauthority
1193: *
1194: * @return string
1195: */
1196: protected function get_iauthority()
1197: {
1198: if ($this->iuserinfo !== null || $this->ihost !== null || $this->port !== null)
1199: {
1200: $iauthority = '';
1201: if ($this->iuserinfo !== null)
1202: {
1203: $iauthority .= $this->iuserinfo . '@';
1204: }
1205: if ($this->ihost !== null)
1206: {
1207: $iauthority .= $this->ihost;
1208: }
1209: if ($this->port !== null)
1210: {
1211: $iauthority .= ':' . $this->port;
1212: }
1213: return $iauthority;
1214: }
1215: else
1216: {
1217: return null;
1218: }
1219: }
1220:
1221: /**
1222: * Get the complete authority
1223: *
1224: * @return string
1225: */
1226: protected function get_authority()
1227: {
1228: $iauthority = $this->get_iauthority();
1229: if (is_string($iauthority))
1230: return $this->to_uri($iauthority);
1231: else
1232: return $iauthority;
1233: }
1234: }
1235: