529 lines
13 KiB
PHP
529 lines
13 KiB
PHP
|
<?php
|
||
|
|
||
|
namespace Sphere\Debloat\RemoveCss;
|
||
|
|
||
|
use Sabberworm\CSS\CSSList\AtRuleBlockList;
|
||
|
use Sabberworm\CSS\CSSList\CSSBlockList;
|
||
|
use Sabberworm\CSS\CSSList\Document;
|
||
|
use Sabberworm\CSS\OutputFormat;
|
||
|
use Sabberworm\CSS\Parser as CSSParser;
|
||
|
use Sabberworm\CSS\RuleSet\DeclarationBlock;
|
||
|
use Sabberworm\CSS\Settings;
|
||
|
use Sabberworm\CSS\Value\URL;
|
||
|
|
||
|
use Sphere\Debloat\OptimizeCss\Stylesheet;
|
||
|
use Sphere\Debloat\Util;
|
||
|
|
||
|
/**
|
||
|
* Sanitizer removes the unnecessary CSS, provided a stylesheet.
|
||
|
*
|
||
|
* @author asadkn
|
||
|
* @since 1.0.0
|
||
|
*/
|
||
|
class Sanitizer
|
||
|
{
|
||
|
/**
|
||
|
* @var Stylesheet
|
||
|
*/
|
||
|
protected $sheet;
|
||
|
|
||
|
protected $css;
|
||
|
protected $used_markup = [
|
||
|
'classes' => [],
|
||
|
'tags' => [],
|
||
|
'ids' => []
|
||
|
];
|
||
|
|
||
|
protected $allow_selectors = [];
|
||
|
|
||
|
/**
|
||
|
* @param Stylesheet $sheet
|
||
|
* @param array $used_markup {
|
||
|
* @type array $classes
|
||
|
* @type array $tags
|
||
|
* @type array $ids
|
||
|
* }
|
||
|
*/
|
||
|
public function __construct(Stylesheet $sheet, array $used_markup, $allow = [])
|
||
|
{
|
||
|
$this->sheet = $sheet;
|
||
|
$this->css = $sheet->content;
|
||
|
$this->used_markup = array_replace($this->used_markup, $used_markup);
|
||
|
|
||
|
$this->allow_selectors = $allow;
|
||
|
|
||
|
}
|
||
|
|
||
|
public function set_cache($data)
|
||
|
{
|
||
|
$this->cache = $data;
|
||
|
}
|
||
|
|
||
|
public function sanitize()
|
||
|
{
|
||
|
$data = $this->sheet->parsed_data ?: [];
|
||
|
if (!$data) {
|
||
|
|
||
|
// Strip the dreaded UTF-8 byte order mark (BOM, \uFEFF). Ref: https://github.com/sabberworm/PHP-CSS-Parser/issues/150
|
||
|
$this->css = preg_replace('/^\xEF\xBB\xBF/', '', $this->css);
|
||
|
|
||
|
$config = Settings::create()->withMultibyteSupport(false);
|
||
|
$parser = new CSSParser($this->css, $config);
|
||
|
$parsed = $parser->parse();
|
||
|
|
||
|
// Fix relative URLs.
|
||
|
$this->convert_urls($parsed);
|
||
|
$data = $this->transform_data($parsed);
|
||
|
|
||
|
$this->sheet->parsed_data = $data;
|
||
|
}
|
||
|
|
||
|
$this->process_allowed_selectors();
|
||
|
|
||
|
return $this->render_css($data);
|
||
|
}
|
||
|
|
||
|
/**
|
||
|
* Convert relative URLs to full URLs for inline inclusion or changed paths.
|
||
|
*
|
||
|
* @param Document $data
|
||
|
* @return void
|
||
|
*/
|
||
|
public function convert_urls(Document $data)
|
||
|
{
|
||
|
$base_url = preg_replace('#[^/]+\?.*$#', '', $this->sheet->url);
|
||
|
|
||
|
$values = $data->getAllValues();
|
||
|
foreach ($values as $value) {
|
||
|
if (!($value instanceof URL)) {
|
||
|
continue;
|
||
|
}
|
||
|
|
||
|
$url = $value->getURL()->getString();
|
||
|
// if (substr($url, 0, 5) === 'data:') {
|
||
|
// continue;
|
||
|
// }
|
||
|
if (preg_match('/^(https?|data):/', $url)) {
|
||
|
continue;
|
||
|
}
|
||
|
|
||
|
$parsed_url = parse_url($url);
|
||
|
|
||
|
// Skip known host and protocol-relative paths.
|
||
|
if (!empty($parsed_url['host']) || empty($parsed_url['path']) || $parsed_url['path'][0] === '/') {
|
||
|
continue;
|
||
|
}
|
||
|
|
||
|
$new_url = $base_url . $url;
|
||
|
$value->getUrl()->setString($new_url);
|
||
|
}
|
||
|
}
|
||
|
|
||
|
/**
|
||
|
* Transform data structure to store in our format. This data will be used without
|
||
|
* loading CSS Parser on further requests.
|
||
|
*
|
||
|
* @param CSSBlockList $data
|
||
|
* @return array
|
||
|
*/
|
||
|
public function transform_data(CSSBlockList $data)
|
||
|
{
|
||
|
$items = [];
|
||
|
foreach ($data->getContents() as $content) {
|
||
|
if ($content instanceof AtRuleBlockList) {
|
||
|
$items[] = [
|
||
|
'rulesets' => $this->transform_data($content),
|
||
|
'at_rule' => "@{$content->atRuleName()} {$content->atRuleArgs()}",
|
||
|
];
|
||
|
}
|
||
|
else {
|
||
|
$item = [
|
||
|
//'css' => $content->render(OutputFormat::createPretty())
|
||
|
'css' => $content->render(OutputFormat::createCompact())
|
||
|
];
|
||
|
|
||
|
if ($content instanceof DeclarationBlock) {
|
||
|
$item['selectors'] = $this->parse_selectors($content->getSelectors());
|
||
|
}
|
||
|
|
||
|
$items[] = $item;
|
||
|
}
|
||
|
}
|
||
|
|
||
|
return $items;
|
||
|
}
|
||
|
|
||
|
/**
|
||
|
* Parse selectors to get classes, id, tags and attrs.
|
||
|
*
|
||
|
* @param array $selectors
|
||
|
* @return array
|
||
|
*/
|
||
|
protected function parse_selectors($selectors)
|
||
|
{
|
||
|
$selectors = array_map(
|
||
|
function($sel) {
|
||
|
return $sel->__toString();
|
||
|
},
|
||
|
$selectors
|
||
|
);
|
||
|
|
||
|
$selectors_data = [];
|
||
|
foreach ($selectors as $selector) {
|
||
|
$data = [
|
||
|
'classes' => [],
|
||
|
'ids' => [],
|
||
|
'tags' => [],
|
||
|
// 'pseudo' => [],
|
||
|
'attrs' => [],
|
||
|
'selector' => trim($selector),
|
||
|
];
|
||
|
|
||
|
// if (strpos($selector, ':root') !== false) {
|
||
|
// $data['pseudo'][':root'] = 1;
|
||
|
// }
|
||
|
|
||
|
// Based on AMP plugin.
|
||
|
// Handle :not() and pseudo selectors to eliminate false negatives.
|
||
|
$selector = preg_replace('/(?<!\\\\)::?[a-zA-Z0-9_-]+(\(.+?\))?/', '', $selector);
|
||
|
|
||
|
// Get attributes but remove them from the selector to prevent false positives
|
||
|
// from within attribute selector.
|
||
|
$selector = preg_replace_callback(
|
||
|
'/\[([A-Za-z0-9_:-]+)(\W?=[^\]]+)?\]/',
|
||
|
function($matches) use (&$data) {
|
||
|
$data['attrs'][] = $matches[1];
|
||
|
return '';
|
||
|
},
|
||
|
$selector
|
||
|
);
|
||
|
|
||
|
// Extract class names.
|
||
|
$selector = preg_replace_callback(
|
||
|
// The `\\\\.` will allow any char via escaping, like the colon in `.lg\:w-full`.
|
||
|
'/\.((?:[a-zA-Z0-9_-]+|\\\\.)+)/',
|
||
|
function($matches) use (&$data) {
|
||
|
$data['classes'][] = stripslashes($matches[1]);
|
||
|
return '';
|
||
|
},
|
||
|
$selector
|
||
|
);
|
||
|
|
||
|
// Extract IDs.
|
||
|
$selector = preg_replace_callback(
|
||
|
'/#([a-zA-Z0-9_-]+)/',
|
||
|
function( $matches ) use (&$data) {
|
||
|
$data['ids'][] = $matches[1];
|
||
|
return '';
|
||
|
},
|
||
|
$selector
|
||
|
);
|
||
|
|
||
|
// Extract tag names.
|
||
|
$selector = preg_replace_callback(
|
||
|
'/[a-zA-Z0-9_-]+/',
|
||
|
function($matches) use (&$data) {
|
||
|
$data['tags'][] = $matches[0];
|
||
|
return '';
|
||
|
},
|
||
|
$selector
|
||
|
);
|
||
|
|
||
|
$selectors_data[] = array_filter($data);
|
||
|
}
|
||
|
|
||
|
return array_filter($selectors_data);
|
||
|
}
|
||
|
|
||
|
public function render_css($data)
|
||
|
{
|
||
|
$rendered = [];
|
||
|
foreach ($data as $item) {
|
||
|
|
||
|
// Has CSS.
|
||
|
if (isset($item['css'])) {
|
||
|
$css = $item['css'];
|
||
|
|
||
|
// Render only if at least one selector meets the should_include criteria.
|
||
|
$should_render = !isset($item['selectors']) ||
|
||
|
0 !== count(
|
||
|
array_filter(
|
||
|
$item['selectors'],
|
||
|
function($selector) {
|
||
|
return $this->should_include($selector);
|
||
|
}
|
||
|
)
|
||
|
);
|
||
|
|
||
|
if ($should_render) {
|
||
|
$rendered[] = $css;
|
||
|
}
|
||
|
|
||
|
continue;
|
||
|
}
|
||
|
|
||
|
// Nested ruleset.
|
||
|
if (!empty($item['rulesets'])) {
|
||
|
$child_rulesets = $this->render_css($item['rulesets']);
|
||
|
|
||
|
if ($child_rulesets) {
|
||
|
$rendered[] = sprintf(
|
||
|
'%s { %s }',
|
||
|
$item['at_rule'],
|
||
|
$child_rulesets
|
||
|
);
|
||
|
}
|
||
|
}
|
||
|
}
|
||
|
|
||
|
return implode("", $rendered);
|
||
|
}
|
||
|
|
||
|
/**
|
||
|
* Pre-process allowed selectors.
|
||
|
*
|
||
|
* Convert data structures in proper format, mainly for performance.
|
||
|
*
|
||
|
* @return void
|
||
|
*/
|
||
|
protected function process_allowed_selectors()
|
||
|
{
|
||
|
foreach ($this->allow_selectors as $key => $value) {
|
||
|
|
||
|
// Check if selector rule valid for current sheet.
|
||
|
if (isset($value['sheet']) && !Util\asset_match($value['sheet'], $this->sheet)) {
|
||
|
unset($this->allow_selectors[$key]);
|
||
|
continue;
|
||
|
}
|
||
|
|
||
|
$value = $this->add_search_regex($value);
|
||
|
$regex = $value['search_regex'] ?? '';
|
||
|
|
||
|
// Pre-compute the matching regex for performance.
|
||
|
if (isset($value['search'])) {
|
||
|
$value['search'] = array_filter((array) $value['search']);
|
||
|
|
||
|
// If we still have something.
|
||
|
if ($value['search']) {
|
||
|
$loose_regex = '(' . implode('|', array_map('preg_quote', $value['search'])) . ')(?=\s|\.|\:|,|\[|$)';
|
||
|
|
||
|
// Combine with search_regex if available.
|
||
|
$regex = $regex ? "($loose_regex|$regex)" : $loose_regex;
|
||
|
}
|
||
|
}
|
||
|
|
||
|
if ($regex) {
|
||
|
$value['computed_search_regex'] = $regex;
|
||
|
}
|
||
|
|
||
|
$this->allow_selectors[$key] = $value;
|
||
|
}
|
||
|
}
|
||
|
|
||
|
/**
|
||
|
* Add search regex to array by converting astrisks to proper regex search.
|
||
|
*
|
||
|
* @param array $value
|
||
|
* @return array
|
||
|
*/
|
||
|
protected function add_search_regex(array $value)
|
||
|
{
|
||
|
if (isset($value['search_regex'])) {
|
||
|
return $value;
|
||
|
}
|
||
|
|
||
|
if (isset($value['search'])) {
|
||
|
|
||
|
$value['search'] = (array) $value['search'];
|
||
|
$regex = [];
|
||
|
|
||
|
foreach ($value['search'] as $key => $search) {
|
||
|
if (strpos($search, '*') !== false) {
|
||
|
$search = trim($search);
|
||
|
|
||
|
// Optimize regex for starting.
|
||
|
// Note: Ending asterisk removal isn't necessary. PCRE engine is optimized for that.
|
||
|
$has_first_asterisk = 0;
|
||
|
$search = preg_replace('/^\*(.+?)/', '\\1', $search, 1, $has_first_asterisk);
|
||
|
|
||
|
// 1. Space and asterisk matches a class itself, followed by space (child), or comma separator.
|
||
|
// 2. Only asterisk is considered more of a prefix/suffix and .class* will match .classname too.
|
||
|
$search = preg_quote($search);
|
||
|
$search = str_replace(' \*', '(\s|$|,|\:).*?', $search);
|
||
|
$search = str_replace('\*', '.*?', $search);
|
||
|
|
||
|
// Note: To prevent ^(.*?) which is slow, we add starting position match only
|
||
|
// if the search doesn't start with asterisk match.
|
||
|
$regex[] = ($has_first_asterisk ? '' : '^') . $search;
|
||
|
|
||
|
unset($value['search'][$key]);
|
||
|
}
|
||
|
}
|
||
|
|
||
|
if ($regex) {
|
||
|
$value['search_regex'] = '(' . implode('|', $regex) . ')';
|
||
|
}
|
||
|
}
|
||
|
|
||
|
return $value;
|
||
|
}
|
||
|
|
||
|
|
||
|
/**
|
||
|
* Whether to include a selector in the output.
|
||
|
*
|
||
|
* @param array $selector {
|
||
|
* @type string[]|null $classes
|
||
|
* @type string[]|null $ids
|
||
|
* @type string[]|null $tags
|
||
|
* }
|
||
|
* @return boolean
|
||
|
*/
|
||
|
public function should_include($selector)
|
||
|
{
|
||
|
// :root is always valid.
|
||
|
// Note: Selectors of type `:root .class` will not match this but will be validated below
|
||
|
// if .class is used, as intended.
|
||
|
if ($selector['selector'] === ':root') {
|
||
|
return true;
|
||
|
}
|
||
|
|
||
|
// If it's an attribute selector with nothing else, it should be kept. Perhaps *[attr] or [attr].
|
||
|
if (!empty($selector['attrs'])
|
||
|
&& (empty($selector['classes']) && empty($selector['ids']) && empty($selector['tags']))
|
||
|
) {
|
||
|
return true;
|
||
|
}
|
||
|
|
||
|
// Check allow list.
|
||
|
// @todo move to cached pre-processed. Clear on settings change.
|
||
|
|
||
|
// $this->allow_selectors = [
|
||
|
// [
|
||
|
// 'type' => 'any',
|
||
|
// 'search' => '.auth-modal',
|
||
|
// ],
|
||
|
// [
|
||
|
// 'type' => 'prefix',
|
||
|
// 'class' => 's-dark'
|
||
|
// ],
|
||
|
// [
|
||
|
// 'type' => 'class',
|
||
|
// 'class' => 'has-lb',
|
||
|
// 'search' => ['.mfp-']
|
||
|
// ],
|
||
|
// [
|
||
|
// 'type' => 'any',
|
||
|
// 'class' => 'has-lb',
|
||
|
// 'search' => ['.mfp-']
|
||
|
// ],
|
||
|
// ];
|
||
|
|
||
|
if ($this->allow_selectors) {
|
||
|
foreach ($this->allow_selectors as $include) {
|
||
|
|
||
|
/**
|
||
|
* Prefix-based + all other classes/tags/etc. in selector exist in doc.
|
||
|
*
|
||
|
* Note: It's basically to ignore the first class and include the sub-classes based
|
||
|
* on their existence in doc. Example: .scheme-dark or .scheme-light.
|
||
|
*/
|
||
|
if ($include['type'] === 'prefix') {
|
||
|
|
||
|
// Check if exact match.
|
||
|
if (('.' . $include['class']) === $selector['selector']) {
|
||
|
return true;
|
||
|
}
|
||
|
|
||
|
// Check if first class matches.
|
||
|
$has_prefix = $include['class'] === substr($selector['selector'], 1, strlen($include['class']));
|
||
|
if ($has_prefix) {
|
||
|
|
||
|
// Will check for validity later below. Remove first class as it's allowed.
|
||
|
if (isset($selector['classes'])) {
|
||
|
$selector['classes'] = array_diff($selector['classes'], [$include['class']]);
|
||
|
}
|
||
|
|
||
|
// WARNING: Due to this break, if there's a rule to allow all selectors of this prefix
|
||
|
// that appear later, it won't be validated.
|
||
|
// @todo Sort prefixes to be at the end or run them later.
|
||
|
break;
|
||
|
}
|
||
|
|
||
|
continue;
|
||
|
}
|
||
|
|
||
|
// Check if a class exists in document.
|
||
|
if ($include['type'] === 'class') {
|
||
|
if (!$this->is_used($include['class'], 'classes')) {
|
||
|
continue;
|
||
|
}
|
||
|
}
|
||
|
|
||
|
// Simple search selector string.
|
||
|
// $search = !empty($include['search']) ? (array) $include['search'] : [];
|
||
|
|
||
|
// Any type, normal selector string match.
|
||
|
// Note: The regex is equal at n=1 and faster at n>1, surprisingly.
|
||
|
// if ($search) {
|
||
|
// foreach ($search as $to_match) {
|
||
|
// if (strpos($selector['selector'], $to_match) !== false) {
|
||
|
// return true;
|
||
|
// }
|
||
|
// }
|
||
|
// }
|
||
|
|
||
|
// Pre-computed regex - combined 'search' and 'search_regex'.
|
||
|
if (!empty($include['computed_search_regex'])) {
|
||
|
if (preg_match('#' . $include['computed_search_regex'] . '#', $selector['selector'])) {
|
||
|
return true;
|
||
|
}
|
||
|
}
|
||
|
}
|
||
|
}
|
||
|
|
||
|
$valid = true;
|
||
|
if (
|
||
|
// Check if all classes are used.
|
||
|
(!empty($selector['classes']) && !$this->is_used($selector['classes'], 'classes'))
|
||
|
|
||
|
// Check if all the ids are used.
|
||
|
|| (!empty($selector['ids']) && !$this->is_used($selector['ids'], 'ids'))
|
||
|
|
||
|
// Check for the target tags in used.
|
||
|
|| (!empty($selector['tags']) && !$this->is_used($selector['tags'], 'tags'))
|
||
|
) {
|
||
|
$valid = false;
|
||
|
}
|
||
|
|
||
|
return $valid;
|
||
|
}
|
||
|
|
||
|
/**
|
||
|
* Test if a selector classes, ids, or tags are used in the doc (provided in $this->used_markup).
|
||
|
*
|
||
|
* @param string|array $targets
|
||
|
* @param string $type 'classes', 'tags', or 'ids'.
|
||
|
* @return boolean
|
||
|
*/
|
||
|
public function is_used($targets, $type = '')
|
||
|
{
|
||
|
if (!$type) {
|
||
|
return false;
|
||
|
}
|
||
|
|
||
|
if (!is_array($targets)) {
|
||
|
$targets = (array) $targets;
|
||
|
}
|
||
|
|
||
|
foreach ($targets as $target) {
|
||
|
// All targets must exist.
|
||
|
if (!isset($this->used_markup[$type][$target])) {
|
||
|
return false;
|
||
|
}
|
||
|
}
|
||
|
|
||
|
return true;
|
||
|
}
|
||
|
|
||
|
}
|