Uri.php 21 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740
  1. <?php
  2. declare(strict_types=1);
  3. namespace GuzzleHttp\Psr7;
  4. use GuzzleHttp\Psr7\Exception\MalformedUriException;
  5. use Psr\Http\Message\UriInterface;
  6. /**
  7. * PSR-7 URI implementation.
  8. *
  9. * @author Michael Dowling
  10. * @author Tobias Schultze
  11. * @author Matthew Weier O'Phinney
  12. */
  13. class Uri implements UriInterface, \JsonSerializable
  14. {
  15. /**
  16. * Absolute http and https URIs require a host per RFC 7230 Section 2.7
  17. * but in generic URIs the host can be empty. So for http(s) URIs
  18. * we apply this default host when no host is given yet to form a
  19. * valid URI.
  20. */
  21. private const HTTP_DEFAULT_HOST = 'localhost';
  22. private const DEFAULT_PORTS = [
  23. 'http' => 80,
  24. 'https' => 443,
  25. 'ftp' => 21,
  26. 'gopher' => 70,
  27. 'nntp' => 119,
  28. 'news' => 119,
  29. 'telnet' => 23,
  30. 'tn3270' => 23,
  31. 'imap' => 143,
  32. 'pop' => 110,
  33. 'ldap' => 389,
  34. ];
  35. /**
  36. * Unreserved characters for use in a regex.
  37. *
  38. * @link https://tools.ietf.org/html/rfc3986#section-2.3
  39. */
  40. private const CHAR_UNRESERVED = 'a-zA-Z0-9_\-\.~';
  41. /**
  42. * Sub-delims for use in a regex.
  43. *
  44. * @link https://tools.ietf.org/html/rfc3986#section-2.2
  45. */
  46. private const CHAR_SUB_DELIMS = '!\$&\'\(\)\*\+,;=';
  47. private const QUERY_SEPARATORS_REPLACEMENT = ['=' => '%3D', '&' => '%26'];
  48. /** @var string Uri scheme. */
  49. private $scheme = '';
  50. /** @var string Uri user info. */
  51. private $userInfo = '';
  52. /** @var string Uri host. */
  53. private $host = '';
  54. /** @var int|null Uri port. */
  55. private $port;
  56. /** @var string Uri path. */
  57. private $path = '';
  58. /** @var string Uri query string. */
  59. private $query = '';
  60. /** @var string Uri fragment. */
  61. private $fragment = '';
  62. /** @var string|null String representation */
  63. private $composedComponents;
  64. public function __construct(string $uri = '')
  65. {
  66. if ($uri !== '') {
  67. $parts = self::parse($uri);
  68. if ($parts === false) {
  69. throw new MalformedUriException("Unable to parse URI: $uri");
  70. }
  71. $this->applyParts($parts);
  72. }
  73. }
  74. /**
  75. * UTF-8 aware \parse_url() replacement.
  76. *
  77. * The internal function produces broken output for non ASCII domain names
  78. * (IDN) when used with locales other than "C".
  79. *
  80. * On the other hand, cURL understands IDN correctly only when UTF-8 locale
  81. * is configured ("C.UTF-8", "en_US.UTF-8", etc.).
  82. *
  83. * @see https://bugs.php.net/bug.php?id=52923
  84. * @see https://www.php.net/manual/en/function.parse-url.php#114817
  85. * @see https://curl.haxx.se/libcurl/c/CURLOPT_URL.html#ENCODING
  86. *
  87. * @return array|false
  88. */
  89. private static function parse(string $url)
  90. {
  91. // If IPv6
  92. $prefix = '';
  93. if (preg_match('%^(.*://\[[0-9:a-f]+\])(.*?)$%', $url, $matches)) {
  94. /** @var array{0:string, 1:string, 2:string} $matches */
  95. $prefix = $matches[1];
  96. $url = $matches[2];
  97. }
  98. /** @var string */
  99. $encodedUrl = preg_replace_callback(
  100. '%[^:/@?&=#]+%usD',
  101. static function ($matches) {
  102. return urlencode($matches[0]);
  103. },
  104. $url
  105. );
  106. $result = parse_url($prefix . $encodedUrl);
  107. if ($result === false) {
  108. return false;
  109. }
  110. return array_map('urldecode', $result);
  111. }
  112. public function __toString(): string
  113. {
  114. if ($this->composedComponents === null) {
  115. $this->composedComponents = self::composeComponents(
  116. $this->scheme,
  117. $this->getAuthority(),
  118. $this->path,
  119. $this->query,
  120. $this->fragment
  121. );
  122. }
  123. return $this->composedComponents;
  124. }
  125. /**
  126. * Composes a URI reference string from its various components.
  127. *
  128. * Usually this method does not need to be called manually but instead is used indirectly via
  129. * `Psr\Http\Message\UriInterface::__toString`.
  130. *
  131. * PSR-7 UriInterface treats an empty component the same as a missing component as
  132. * getQuery(), getFragment() etc. always return a string. This explains the slight
  133. * difference to RFC 3986 Section 5.3.
  134. *
  135. * Another adjustment is that the authority separator is added even when the authority is missing/empty
  136. * for the "file" scheme. This is because PHP stream functions like `file_get_contents` only work with
  137. * `file:///myfile` but not with `file:/myfile` although they are equivalent according to RFC 3986. But
  138. * `file:///` is the more common syntax for the file scheme anyway (Chrome for example redirects to
  139. * that format).
  140. *
  141. * @link https://tools.ietf.org/html/rfc3986#section-5.3
  142. */
  143. public static function composeComponents(?string $scheme, ?string $authority, string $path, ?string $query, ?string $fragment): string
  144. {
  145. $uri = '';
  146. // weak type checks to also accept null until we can add scalar type hints
  147. if ($scheme != '') {
  148. $uri .= $scheme . ':';
  149. }
  150. if ($authority != '' || $scheme === 'file') {
  151. $uri .= '//' . $authority;
  152. }
  153. if ($authority != '' && $path != '' && $path[0] != '/') {
  154. $path = '/' . $path;
  155. }
  156. $uri .= $path;
  157. if ($query != '') {
  158. $uri .= '?' . $query;
  159. }
  160. if ($fragment != '') {
  161. $uri .= '#' . $fragment;
  162. }
  163. return $uri;
  164. }
  165. /**
  166. * Whether the URI has the default port of the current scheme.
  167. *
  168. * `Psr\Http\Message\UriInterface::getPort` may return null or the standard port. This method can be used
  169. * independently of the implementation.
  170. */
  171. public static function isDefaultPort(UriInterface $uri): bool
  172. {
  173. return $uri->getPort() === null
  174. || (isset(self::DEFAULT_PORTS[$uri->getScheme()]) && $uri->getPort() === self::DEFAULT_PORTS[$uri->getScheme()]);
  175. }
  176. /**
  177. * Whether the URI is absolute, i.e. it has a scheme.
  178. *
  179. * An instance of UriInterface can either be an absolute URI or a relative reference. This method returns true
  180. * if it is the former. An absolute URI has a scheme. A relative reference is used to express a URI relative
  181. * to another URI, the base URI. Relative references can be divided into several forms:
  182. * - network-path references, e.g. '//example.com/path'
  183. * - absolute-path references, e.g. '/path'
  184. * - relative-path references, e.g. 'subpath'
  185. *
  186. * @see Uri::isNetworkPathReference
  187. * @see Uri::isAbsolutePathReference
  188. * @see Uri::isRelativePathReference
  189. * @link https://tools.ietf.org/html/rfc3986#section-4
  190. */
  191. public static function isAbsolute(UriInterface $uri): bool
  192. {
  193. return $uri->getScheme() !== '';
  194. }
  195. /**
  196. * Whether the URI is a network-path reference.
  197. *
  198. * A relative reference that begins with two slash characters is termed an network-path reference.
  199. *
  200. * @link https://tools.ietf.org/html/rfc3986#section-4.2
  201. */
  202. public static function isNetworkPathReference(UriInterface $uri): bool
  203. {
  204. return $uri->getScheme() === '' && $uri->getAuthority() !== '';
  205. }
  206. /**
  207. * Whether the URI is a absolute-path reference.
  208. *
  209. * A relative reference that begins with a single slash character is termed an absolute-path reference.
  210. *
  211. * @link https://tools.ietf.org/html/rfc3986#section-4.2
  212. */
  213. public static function isAbsolutePathReference(UriInterface $uri): bool
  214. {
  215. return $uri->getScheme() === ''
  216. && $uri->getAuthority() === ''
  217. && isset($uri->getPath()[0])
  218. && $uri->getPath()[0] === '/';
  219. }
  220. /**
  221. * Whether the URI is a relative-path reference.
  222. *
  223. * A relative reference that does not begin with a slash character is termed a relative-path reference.
  224. *
  225. * @link https://tools.ietf.org/html/rfc3986#section-4.2
  226. */
  227. public static function isRelativePathReference(UriInterface $uri): bool
  228. {
  229. return $uri->getScheme() === ''
  230. && $uri->getAuthority() === ''
  231. && (!isset($uri->getPath()[0]) || $uri->getPath()[0] !== '/');
  232. }
  233. /**
  234. * Whether the URI is a same-document reference.
  235. *
  236. * A same-document reference refers to a URI that is, aside from its fragment
  237. * component, identical to the base URI. When no base URI is given, only an empty
  238. * URI reference (apart from its fragment) is considered a same-document reference.
  239. *
  240. * @param UriInterface $uri The URI to check
  241. * @param UriInterface|null $base An optional base URI to compare against
  242. *
  243. * @link https://tools.ietf.org/html/rfc3986#section-4.4
  244. */
  245. public static function isSameDocumentReference(UriInterface $uri, UriInterface $base = null): bool
  246. {
  247. if ($base !== null) {
  248. $uri = UriResolver::resolve($base, $uri);
  249. return ($uri->getScheme() === $base->getScheme())
  250. && ($uri->getAuthority() === $base->getAuthority())
  251. && ($uri->getPath() === $base->getPath())
  252. && ($uri->getQuery() === $base->getQuery());
  253. }
  254. return $uri->getScheme() === '' && $uri->getAuthority() === '' && $uri->getPath() === '' && $uri->getQuery() === '';
  255. }
  256. /**
  257. * Creates a new URI with a specific query string value removed.
  258. *
  259. * Any existing query string values that exactly match the provided key are
  260. * removed.
  261. *
  262. * @param UriInterface $uri URI to use as a base.
  263. * @param string $key Query string key to remove.
  264. */
  265. public static function withoutQueryValue(UriInterface $uri, string $key): UriInterface
  266. {
  267. $result = self::getFilteredQueryString($uri, [$key]);
  268. return $uri->withQuery(implode('&', $result));
  269. }
  270. /**
  271. * Creates a new URI with a specific query string value.
  272. *
  273. * Any existing query string values that exactly match the provided key are
  274. * removed and replaced with the given key value pair.
  275. *
  276. * A value of null will set the query string key without a value, e.g. "key"
  277. * instead of "key=value".
  278. *
  279. * @param UriInterface $uri URI to use as a base.
  280. * @param string $key Key to set.
  281. * @param string|null $value Value to set
  282. */
  283. public static function withQueryValue(UriInterface $uri, string $key, ?string $value): UriInterface
  284. {
  285. $result = self::getFilteredQueryString($uri, [$key]);
  286. $result[] = self::generateQueryString($key, $value);
  287. return $uri->withQuery(implode('&', $result));
  288. }
  289. /**
  290. * Creates a new URI with multiple specific query string values.
  291. *
  292. * It has the same behavior as withQueryValue() but for an associative array of key => value.
  293. *
  294. * @param UriInterface $uri URI to use as a base.
  295. * @param array<string, string|null> $keyValueArray Associative array of key and values
  296. */
  297. public static function withQueryValues(UriInterface $uri, array $keyValueArray): UriInterface
  298. {
  299. $result = self::getFilteredQueryString($uri, array_keys($keyValueArray));
  300. foreach ($keyValueArray as $key => $value) {
  301. $result[] = self::generateQueryString((string) $key, $value !== null ? (string) $value : null);
  302. }
  303. return $uri->withQuery(implode('&', $result));
  304. }
  305. /**
  306. * Creates a URI from a hash of `parse_url` components.
  307. *
  308. * @link http://php.net/manual/en/function.parse-url.php
  309. *
  310. * @throws MalformedUriException If the components do not form a valid URI.
  311. */
  312. public static function fromParts(array $parts): UriInterface
  313. {
  314. $uri = new self();
  315. $uri->applyParts($parts);
  316. $uri->validateState();
  317. return $uri;
  318. }
  319. public function getScheme(): string
  320. {
  321. return $this->scheme;
  322. }
  323. public function getAuthority(): string
  324. {
  325. $authority = $this->host;
  326. if ($this->userInfo !== '') {
  327. $authority = $this->userInfo . '@' . $authority;
  328. }
  329. if ($this->port !== null) {
  330. $authority .= ':' . $this->port;
  331. }
  332. return $authority;
  333. }
  334. public function getUserInfo(): string
  335. {
  336. return $this->userInfo;
  337. }
  338. public function getHost(): string
  339. {
  340. return $this->host;
  341. }
  342. public function getPort(): ?int
  343. {
  344. return $this->port;
  345. }
  346. public function getPath(): string
  347. {
  348. return $this->path;
  349. }
  350. public function getQuery(): string
  351. {
  352. return $this->query;
  353. }
  354. public function getFragment(): string
  355. {
  356. return $this->fragment;
  357. }
  358. public function withScheme($scheme): UriInterface
  359. {
  360. $scheme = $this->filterScheme($scheme);
  361. if ($this->scheme === $scheme) {
  362. return $this;
  363. }
  364. $new = clone $this;
  365. $new->scheme = $scheme;
  366. $new->composedComponents = null;
  367. $new->removeDefaultPort();
  368. $new->validateState();
  369. return $new;
  370. }
  371. public function withUserInfo($user, $password = null): UriInterface
  372. {
  373. $info = $this->filterUserInfoComponent($user);
  374. if ($password !== null) {
  375. $info .= ':' . $this->filterUserInfoComponent($password);
  376. }
  377. if ($this->userInfo === $info) {
  378. return $this;
  379. }
  380. $new = clone $this;
  381. $new->userInfo = $info;
  382. $new->composedComponents = null;
  383. $new->validateState();
  384. return $new;
  385. }
  386. public function withHost($host): UriInterface
  387. {
  388. $host = $this->filterHost($host);
  389. if ($this->host === $host) {
  390. return $this;
  391. }
  392. $new = clone $this;
  393. $new->host = $host;
  394. $new->composedComponents = null;
  395. $new->validateState();
  396. return $new;
  397. }
  398. public function withPort($port): UriInterface
  399. {
  400. $port = $this->filterPort($port);
  401. if ($this->port === $port) {
  402. return $this;
  403. }
  404. $new = clone $this;
  405. $new->port = $port;
  406. $new->composedComponents = null;
  407. $new->removeDefaultPort();
  408. $new->validateState();
  409. return $new;
  410. }
  411. public function withPath($path): UriInterface
  412. {
  413. $path = $this->filterPath($path);
  414. if ($this->path === $path) {
  415. return $this;
  416. }
  417. $new = clone $this;
  418. $new->path = $path;
  419. $new->composedComponents = null;
  420. $new->validateState();
  421. return $new;
  422. }
  423. public function withQuery($query): UriInterface
  424. {
  425. $query = $this->filterQueryAndFragment($query);
  426. if ($this->query === $query) {
  427. return $this;
  428. }
  429. $new = clone $this;
  430. $new->query = $query;
  431. $new->composedComponents = null;
  432. return $new;
  433. }
  434. public function withFragment($fragment): UriInterface
  435. {
  436. $fragment = $this->filterQueryAndFragment($fragment);
  437. if ($this->fragment === $fragment) {
  438. return $this;
  439. }
  440. $new = clone $this;
  441. $new->fragment = $fragment;
  442. $new->composedComponents = null;
  443. return $new;
  444. }
  445. public function jsonSerialize(): string
  446. {
  447. return $this->__toString();
  448. }
  449. /**
  450. * Apply parse_url parts to a URI.
  451. *
  452. * @param array $parts Array of parse_url parts to apply.
  453. */
  454. private function applyParts(array $parts): void
  455. {
  456. $this->scheme = isset($parts['scheme'])
  457. ? $this->filterScheme($parts['scheme'])
  458. : '';
  459. $this->userInfo = isset($parts['user'])
  460. ? $this->filterUserInfoComponent($parts['user'])
  461. : '';
  462. $this->host = isset($parts['host'])
  463. ? $this->filterHost($parts['host'])
  464. : '';
  465. $this->port = isset($parts['port'])
  466. ? $this->filterPort($parts['port'])
  467. : null;
  468. $this->path = isset($parts['path'])
  469. ? $this->filterPath($parts['path'])
  470. : '';
  471. $this->query = isset($parts['query'])
  472. ? $this->filterQueryAndFragment($parts['query'])
  473. : '';
  474. $this->fragment = isset($parts['fragment'])
  475. ? $this->filterQueryAndFragment($parts['fragment'])
  476. : '';
  477. if (isset($parts['pass'])) {
  478. $this->userInfo .= ':' . $this->filterUserInfoComponent($parts['pass']);
  479. }
  480. $this->removeDefaultPort();
  481. }
  482. /**
  483. * @param mixed $scheme
  484. *
  485. * @throws \InvalidArgumentException If the scheme is invalid.
  486. */
  487. private function filterScheme($scheme): string
  488. {
  489. if (!is_string($scheme)) {
  490. throw new \InvalidArgumentException('Scheme must be a string');
  491. }
  492. return \strtr($scheme, 'ABCDEFGHIJKLMNOPQRSTUVWXYZ', 'abcdefghijklmnopqrstuvwxyz');
  493. }
  494. /**
  495. * @param mixed $component
  496. *
  497. * @throws \InvalidArgumentException If the user info is invalid.
  498. */
  499. private function filterUserInfoComponent($component): string
  500. {
  501. if (!is_string($component)) {
  502. throw new \InvalidArgumentException('User info must be a string');
  503. }
  504. return preg_replace_callback(
  505. '/(?:[^%' . self::CHAR_UNRESERVED . self::CHAR_SUB_DELIMS . ']+|%(?![A-Fa-f0-9]{2}))/',
  506. [$this, 'rawurlencodeMatchZero'],
  507. $component
  508. );
  509. }
  510. /**
  511. * @param mixed $host
  512. *
  513. * @throws \InvalidArgumentException If the host is invalid.
  514. */
  515. private function filterHost($host): string
  516. {
  517. if (!is_string($host)) {
  518. throw new \InvalidArgumentException('Host must be a string');
  519. }
  520. return \strtr($host, 'ABCDEFGHIJKLMNOPQRSTUVWXYZ', 'abcdefghijklmnopqrstuvwxyz');
  521. }
  522. /**
  523. * @param mixed $port
  524. *
  525. * @throws \InvalidArgumentException If the port is invalid.
  526. */
  527. private function filterPort($port): ?int
  528. {
  529. if ($port === null) {
  530. return null;
  531. }
  532. $port = (int) $port;
  533. if (0 > $port || 0xffff < $port) {
  534. throw new \InvalidArgumentException(
  535. sprintf('Invalid port: %d. Must be between 0 and 65535', $port)
  536. );
  537. }
  538. return $port;
  539. }
  540. /**
  541. * @param string[] $keys
  542. *
  543. * @return string[]
  544. */
  545. private static function getFilteredQueryString(UriInterface $uri, array $keys): array
  546. {
  547. $current = $uri->getQuery();
  548. if ($current === '') {
  549. return [];
  550. }
  551. $decodedKeys = array_map('rawurldecode', $keys);
  552. return array_filter(explode('&', $current), function ($part) use ($decodedKeys) {
  553. return !in_array(rawurldecode(explode('=', $part)[0]), $decodedKeys, true);
  554. });
  555. }
  556. private static function generateQueryString(string $key, ?string $value): string
  557. {
  558. // Query string separators ("=", "&") within the key or value need to be encoded
  559. // (while preventing double-encoding) before setting the query string. All other
  560. // chars that need percent-encoding will be encoded by withQuery().
  561. $queryString = strtr($key, self::QUERY_SEPARATORS_REPLACEMENT);
  562. if ($value !== null) {
  563. $queryString .= '=' . strtr($value, self::QUERY_SEPARATORS_REPLACEMENT);
  564. }
  565. return $queryString;
  566. }
  567. private function removeDefaultPort(): void
  568. {
  569. if ($this->port !== null && self::isDefaultPort($this)) {
  570. $this->port = null;
  571. }
  572. }
  573. /**
  574. * Filters the path of a URI
  575. *
  576. * @param mixed $path
  577. *
  578. * @throws \InvalidArgumentException If the path is invalid.
  579. */
  580. private function filterPath($path): string
  581. {
  582. if (!is_string($path)) {
  583. throw new \InvalidArgumentException('Path must be a string');
  584. }
  585. return preg_replace_callback(
  586. '/(?:[^' . self::CHAR_UNRESERVED . self::CHAR_SUB_DELIMS . '%:@\/]++|%(?![A-Fa-f0-9]{2}))/',
  587. [$this, 'rawurlencodeMatchZero'],
  588. $path
  589. );
  590. }
  591. /**
  592. * Filters the query string or fragment of a URI.
  593. *
  594. * @param mixed $str
  595. *
  596. * @throws \InvalidArgumentException If the query or fragment is invalid.
  597. */
  598. private function filterQueryAndFragment($str): string
  599. {
  600. if (!is_string($str)) {
  601. throw new \InvalidArgumentException('Query and fragment must be a string');
  602. }
  603. return preg_replace_callback(
  604. '/(?:[^' . self::CHAR_UNRESERVED . self::CHAR_SUB_DELIMS . '%:@\/\?]++|%(?![A-Fa-f0-9]{2}))/',
  605. [$this, 'rawurlencodeMatchZero'],
  606. $str
  607. );
  608. }
  609. private function rawurlencodeMatchZero(array $match): string
  610. {
  611. return rawurlencode($match[0]);
  612. }
  613. private function validateState(): void
  614. {
  615. if ($this->host === '' && ($this->scheme === 'http' || $this->scheme === 'https')) {
  616. $this->host = self::HTTP_DEFAULT_HOST;
  617. }
  618. if ($this->getAuthority() === '') {
  619. if (0 === strpos($this->path, '//')) {
  620. throw new MalformedUriException('The path of a URI without an authority must not start with two slashes "//"');
  621. }
  622. if ($this->scheme === '' && false !== strpos(explode('/', $this->path, 2)[0], ':')) {
  623. throw new MalformedUriException('A relative URI must not have a path beginning with a segment containing a colon');
  624. }
  625. }
  626. }
  627. }