get_xsl_url() ); $this->stylesheet = ''; $this->charset = get_bloginfo( 'charset' ); $this->output_charset = $this->charset; if ( $this->charset !== 'UTF-8' && function_exists( 'mb_list_encodings' ) && in_array( $this->charset, mb_list_encodings(), true ) ) { $this->output_charset = 'UTF-8'; } $this->needs_conversion = $this->output_charset !== $this->charset; } /** * Builds the sitemap index. * * @param array $links Set of sitemaps index links. * * @return string */ public function get_index( $links ) { $xml = '' . "\n"; foreach ( $links as $link ) { $xml .= $this->sitemap_index_url( $link ); } /** * Filter to append sitemaps to the index. * * @param string $index String to append to sitemaps index, defaults to empty. */ $xml .= apply_filters( 'wpseo_sitemap_index', '' ); $xml .= ''; return $xml; } /** * Builds the sitemap. * * @param array $links Set of sitemap links. * @param string $type Sitemap type. * @param int $current_page Current sitemap page number. * * @return string */ public function get_sitemap( $links, $type, $current_page ) { $urlset = '' . "\n"; /** * Filters the `urlset` for a sitemap by type. * * @api string $urlset The output for the sitemap's `urlset`. */ $xml = apply_filters( "wpseo_sitemap_{$type}_urlset", $urlset ); foreach ( $links as $url ) { $xml .= $this->sitemap_url( $url ); } /** * Filter to add extra URLs to the XML sitemap by type. * * Only runs for the first page, not on all. * * @param string $content String content to add, defaults to empty. */ if ( $current_page === 1 ) { $xml .= apply_filters( "wpseo_sitemap_{$type}_content", '' ); } $xml .= ''; return $xml; } /** * Produce final XML output with debug information. * * @param string $sitemap Sitemap XML. * * @return string */ public function get_output( $sitemap ) { $output = 'output_charset ) . '"?>'; if ( $this->stylesheet ) { /** * Filter the stylesheet URL for the XML sitemap. * * @param string $stylesheet Stylesheet URL. */ $output .= apply_filters( 'wpseo_stylesheet_url', $this->stylesheet ) . "\n"; } $output .= $sitemap; $output .= "\n"; return $output; } /** * Get charset for the output. * * @return string */ public function get_output_charset() { return $this->output_charset; } /** * Set a custom stylesheet for this sitemap. Set to empty to just remove the default stylesheet. * * @param string $stylesheet Full XML-stylesheet declaration. */ public function set_stylesheet( $stylesheet ) { $this->stylesheet = $stylesheet; } /** * Build the `` tag for a given URL. * * @param array $url Array of parts that make up this entry. * * @return string */ protected function sitemap_index_url( $url ) { $date = null; if ( ! empty( $url['lastmod'] ) ) { $date = YoastSEO()->helpers->date->format( $url['lastmod'] ); } $url['loc'] = htmlspecialchars( $url['loc'], ENT_COMPAT, $this->output_charset, false ); $output = "\t\n"; $output .= "\t\t" . $url['loc'] . "\n"; $output .= empty( $date ) ? '' : "\t\t" . htmlspecialchars( $date, ENT_COMPAT, $this->output_charset, false ) . "\n"; $output .= "\t\n"; return $output; } /** * Build the `` tag for a given URL. * * Public access for backwards compatibility reasons. * * @param array $url Array of parts that make up this entry. * * @return string */ public function sitemap_url( $url ) { $date = null; if ( ! empty( $url['mod'] ) ) { // Create a DateTime object date in the correct timezone. $date = YoastSEO()->helpers->date->format( $url['mod'] ); } $output = "\t\n"; $output .= "\t\t" . $this->encode_and_escape( $url['loc'] ) . "\n"; $output .= empty( $date ) ? '' : "\t\t" . htmlspecialchars( $date, ENT_COMPAT, $this->output_charset, false ) . "\n"; if ( empty( $url['images'] ) ) { $url['images'] = []; } foreach ( $url['images'] as $img ) { if ( empty( $img['src'] ) ) { continue; } $output .= "\t\t\n"; $output .= "\t\t\t" . $this->encode_and_escape( $img['src'] ) . "\n"; $output .= "\t\t\n"; } unset( $img ); $output .= "\t\n"; /** * Filters the output for the sitemap URL tag. * * @api string $output The output for the sitemap url tag. * * @param array $url The sitemap URL array on which the output is based. */ return apply_filters( 'wpseo_sitemap_url', $output, $url ); } /** * Ensure the URL is encoded per RFC3986 and correctly escaped for use in an XML sitemap. * * This method works around a two quirks in esc_url(): * 1. `esc_url()` leaves schema-relative URLs alone, while according to the sitemap specs, * the URL must always begin with a protocol. * 2. `esc_url()` escapes ampersands as `&` instead of the more common `&`. * According to the specs, `&` should be used, and even though this shouldn't * really make a difference in practice, to quote Jono: "I'd be nervous about & * given how many weird and wonderful things eat sitemaps", so better safe than sorry. * * @link https://www.sitemaps.org/protocol.html#xmlTagDefinitions * @link https://www.sitemaps.org/protocol.html#escaping * @link https://developer.wordpress.org/reference/functions/esc_url/ * * @param string $url URL to encode and escape. * * @return string */ protected function encode_and_escape( $url ) { $url = $this->encode_url_rfc3986( $url ); $url = esc_url( $url ); $url = str_replace( '&', '&', $url ); $url = str_replace( ''', ''', $url ); if ( strpos( $url, '//' ) === 0 ) { // Schema-relative URL for which esc_url() does not add a scheme. $url = 'http:' . $url; } return $url; } /** * Apply some best effort conversion to comply with RFC3986. * * @param string $url URL to encode. * * @return string */ protected function encode_url_rfc3986( $url ) { if ( filter_var( $url, FILTER_VALIDATE_URL ) ) { return $url; } $path = wp_parse_url( $url, PHP_URL_PATH ); if ( ! empty( $path ) && $path !== '/' ) { $encoded_path = explode( '/', $path ); // First decode the path, to prevent double encoding. $encoded_path = array_map( 'rawurldecode', $encoded_path ); $encoded_path = array_map( 'rawurlencode', $encoded_path ); $encoded_path = implode( '/', $encoded_path ); $url = str_replace( $path, $encoded_path, $url ); } $query = wp_parse_url( $url, PHP_URL_QUERY ); if ( ! empty( $query ) ) { parse_str( $query, $parsed_query ); $parsed_query = http_build_query( $parsed_query, '', '&', PHP_QUERY_RFC3986 ); $url = str_replace( $query, $parsed_query, $url ); } return $url; } /** * Retrieves the XSL URL that should be used in the current environment * * When home_url and site_url are not the same, the home_url should be used. * This is because the XSL needs to be served from the same domain, protocol and port * as the XML file that is loading it. * * @return string The XSL URL that needs to be used. */ protected function get_xsl_url() { if ( home_url() !== site_url() ) { return home_url( 'main-sitemap.xsl' ); } /* * Fallback to circumvent a cross-domain security problem when the XLS file is * loaded from a different (sub)domain. */ if ( strpos( plugins_url(), home_url() ) !== 0 ) { return home_url( 'main-sitemap.xsl' ); } return plugin_dir_url( WPSEO_FILE ) . 'css/main-sitemap.xsl'; } }