<?php

namespace Sphere\Debloat\RemoveCss;

use Sabberworm\CSS\CSSList\AtRuleBlockList;
use Sabberworm\CSS\CSSList\CSSBlockList;
use Sabberworm\CSS\CSSList\Document;
use Sabberworm\CSS\OutputFormat;
use Sabberworm\CSS\Parser as CSSParser;
use Sabberworm\CSS\RuleSet\DeclarationBlock;
use Sabberworm\CSS\Settings;
use Sabberworm\CSS\Value\URL;

use Sphere\Debloat\OptimizeCss\Stylesheet;
use Sphere\Debloat\Util;

/**
 * Sanitizer removes the unnecessary CSS, provided a stylesheet.
 * 
 * @author  asadkn
 * @since   1.0.0
 */
class Sanitizer
{
	/**
	 * @var Stylesheet
	 */
	protected $sheet;

	protected $css;
	protected $used_markup = [
		'classes' => [],
		'tags'    => [],
		'ids'     => []
	];

	protected $allow_selectors = [];

	/**
	 * @param Stylesheet $sheet
	 * @param array $used_markup {
	 *     @type array $classes
	 *     @type array $tags
	 *     @type array $ids
	 * }
	 */
	public function __construct(Stylesheet $sheet, array $used_markup, $allow = [])
	{
		$this->sheet       = $sheet;
		$this->css         = $sheet->content;
		$this->used_markup = array_replace($this->used_markup, $used_markup);

		$this->allow_selectors = $allow;

	}

	public function set_cache($data)
	{
		$this->cache = $data;
	}

	public function sanitize() 
	{
		$data = $this->sheet->parsed_data ?: [];
		if (!$data) {

			// Strip the dreaded UTF-8 byte order mark (BOM, \uFEFF). Ref: https://github.com/sabberworm/PHP-CSS-Parser/issues/150
			$this->css = preg_replace('/^\xEF\xBB\xBF/', '', $this->css);

			$config = Settings::create()->withMultibyteSupport(false);
			$parser = new CSSParser($this->css, $config);
			$parsed = $parser->parse();

			// Fix relative URLs.
			$this->convert_urls($parsed);
			$data = $this->transform_data($parsed);

			$this->sheet->parsed_data = $data;
		}

		$this->process_allowed_selectors();

		return $this->render_css($data);
	}

	/**
	 * Convert relative URLs to full URLs for inline inclusion or changed paths.
	 *
	 * @param Document $data
	 * @return void
	 */
	public function convert_urls(Document $data)
	{
		$base_url = preg_replace('#[^/]+\?.*$#', '', $this->sheet->url);

		$values = $data->getAllValues();
		foreach ($values as $value) {
			if (!($value instanceof URL)) {
				continue;
			}

			$url = $value->getURL()->getString();
			// if (substr($url, 0, 5) === 'data:') {
			// 	continue;
			// }
			if (preg_match('/^(https?|data):/', $url)) {
				continue;
			}

			$parsed_url = parse_url($url);

			// Skip known host and protocol-relative paths.
			if (!empty($parsed_url['host']) || empty($parsed_url['path']) || $parsed_url['path'][0] === '/') {
				continue;
			}

			$new_url = $base_url . $url;
			$value->getUrl()->setString($new_url);
		}
	}

	/**
	 * Transform data structure to store in our format. This data will be used without 
	 * loading CSS Parser on further requests.
	 *
	 * @param CSSBlockList $data
	 * @return array
	 */
	public function transform_data(CSSBlockList $data)
	{
		$items = [];
		foreach ($data->getContents() as $content) {
			if ($content instanceof AtRuleBlockList) {
				$items[] = [
					'rulesets' => $this->transform_data($content),
					'at_rule'  => "@{$content->atRuleName()} {$content->atRuleArgs()}",
				];
			}
			else {
				$item = [
					//'css' => $content->render(OutputFormat::createPretty())
					'css' => $content->render(OutputFormat::createCompact())
				];

				if ($content instanceof DeclarationBlock) {
					$item['selectors'] = $this->parse_selectors($content->getSelectors());
				}

				$items[] = $item;
			}
		}

		return $items;
	}

	/**
	 * Parse selectors to get classes, id, tags and attrs.
	 *
	 * @param array $selectors
	 * @return array
	 */
	protected function parse_selectors($selectors)
	{
		$selectors = array_map(
			function($sel) {
				return $sel->__toString();
			},
			$selectors
		);

		$selectors_data = [];
		foreach ($selectors as $selector) {
			$data = [
				'classes'  => [],
				'ids'      => [],
				'tags'     => [],
				// 'pseudo'   => [],
				'attrs'    => [],
				'selector' => trim($selector),
			];

			// if (strpos($selector, ':root') !== false) {
			// 	$data['pseudo'][':root'] = 1;
			// }

			// Based on AMP plugin.
			// Handle :not() and pseudo selectors to eliminate false negatives.
			$selector = preg_replace('/(?<!\\\\)::?[a-zA-Z0-9_-]+(\(.+?\))?/', '', $selector);

			// Get attributes but remove them from the selector to prevent false positives 
			// from within attribute selector.
			$selector = preg_replace_callback(
				'/\[([A-Za-z0-9_:-]+)(\W?=[^\]]+)?\]/', 
				function($matches) use (&$data) {
					$data['attrs'][] = $matches[1];
					return '';
				},
				$selector
			);

			// Extract class names.
			$selector = preg_replace_callback(
				// The `\\\\.` will allow any char via escaping, like the colon in `.lg\:w-full`.
				'/\.((?:[a-zA-Z0-9_-]+|\\\\.)+)/',
				function($matches) use (&$data) {
					$data['classes'][] = stripslashes($matches[1]);
					return '';
				},
				$selector
			);

			// Extract IDs.
			$selector = preg_replace_callback(
				'/#([a-zA-Z0-9_-]+)/',
				function( $matches ) use (&$data) {
					$data['ids'][] = $matches[1];
					return '';
				},
				$selector
			);

			// Extract tag names.
			$selector = preg_replace_callback(
				'/[a-zA-Z0-9_-]+/',
				function($matches) use (&$data) {
					$data['tags'][] = $matches[0];
					return '';
				},
				$selector
			);

			$selectors_data[] = array_filter($data);
		}

		return array_filter($selectors_data);
	}

	public function render_css($data)
	{
		$rendered = [];
		foreach ($data as $item) {

			// Has CSS.
			if (isset($item['css'])) {
				$css = $item['css'];

				// Render only if at least one selector meets the should_include criteria.
				$should_render = !isset($item['selectors']) || 
					0 !== count(
						array_filter(
							$item['selectors'],
							function($selector) {
								return $this->should_include($selector);
							}
						)
					);

				if ($should_render) {
					$rendered[] = $css;
				}

				continue;
			}

			// Nested ruleset.
			if (!empty($item['rulesets'])) {
				$child_rulesets = $this->render_css($item['rulesets']);

				if ($child_rulesets) {
					$rendered[] = sprintf(
						'%s { %s }',
						$item['at_rule'],
						$child_rulesets
					);
				}
			}
		}

		return implode("", $rendered);
	}

	/**
	 * Pre-process allowed selectors.
	 * 
	 * Convert data structures in proper format, mainly for performance.
	 *
	 * @return void
	 */
	protected function process_allowed_selectors()
	{		
		foreach ($this->allow_selectors as $key => $value) {

			// Check if selector rule valid for current sheet.
			if (isset($value['sheet']) && !Util\asset_match($value['sheet'], $this->sheet)) {
				unset($this->allow_selectors[$key]);
				continue;
			}

			$value = $this->add_search_regex($value);
			$regex = $value['search_regex'] ?? '';

			// Pre-compute the matching regex for performance.
			if (isset($value['search'])) {
				$value['search'] = array_filter((array) $value['search']);

				// If we still have something.
				if ($value['search']) {
					$loose_regex = '(' . implode('|', array_map('preg_quote', $value['search'])) . ')(?=\s|\.|\:|,|\[|$)';

					// Combine with search_regex if available.
					$regex = $regex ? "($loose_regex|$regex)" : $loose_regex;
				}
			}

			if ($regex) {
				$value['computed_search_regex'] = $regex;
			}
			
			$this->allow_selectors[$key] = $value;
		}
	}

	/**
	 * Add search regex to array by converting astrisks to proper regex search. 
	 *
	 * @param array $value
	 * @return array
	 */
	protected function add_search_regex(array $value)
	{
		if (isset($value['search_regex'])) {
			return $value;
		}

		if (isset($value['search'])) {
			
			$value['search'] = (array) $value['search'];
			$regex = [];

			foreach ($value['search'] as $key => $search) {
				if (strpos($search, '*') !== false) {
					$search = trim($search);

					// Optimize regex for starting.
					// Note: Ending asterisk removal isn't necessary. PCRE engine is optimized for that.
					$has_first_asterisk = 0;
					$search = preg_replace('/^\*(.+?)/', '\\1', $search, 1, $has_first_asterisk);

					// 1. Space and asterisk matches a class itself, followed by space (child), or comma separator.
					// 2. Only asterisk is considered more of a prefix/suffix and .class* will match .classname too.
					$search = preg_quote($search);
					$search = str_replace(' \*', '(\s|$|,|\:).*?', $search);
					$search = str_replace('\*', '.*?', $search);

					// Note: To prevent ^(.*?) which is slow, we add starting position match only 
					// if the search doesn't start with asterisk match.
					$regex[] = ($has_first_asterisk ? '' : '^') . $search;
					
					unset($value['search'][$key]);
				}
			}

			if ($regex) {
				$value['search_regex'] = '(' . implode('|', $regex) . ')';
			}
		}
		
		return $value;
	}
	

	/**
	 * Whether to include a selector in the output.
	 *
	 * @param array $selector {
	 *     @type string[]|null $classes 
	 *     @type string[]|null $ids
	 *     @type string[]|null $tags
	 * }
	 * @return boolean
	 */
	public function should_include($selector)
	{
		// :root is always valid.
		// Note: Selectors of type `:root .class` will not match this but will be validated below 
		// if .class is used, as intended.
		if ($selector['selector'] === ':root') {
			return true;
		}

		// If it's an attribute selector with nothing else, it should be kept. Perhaps *[attr] or [attr].
		if (!empty($selector['attrs'])
			&& (empty($selector['classes']) && empty($selector['ids']) && empty($selector['tags']))
		) {
			return true;
		}

		// Check allow list.
		// @todo move to cached pre-processed. Clear on settings change.

		// $this->allow_selectors = [
		// 	[
		// 		'type'  => 'any', 
		// 		'search' => '.auth-modal',
		// 	],
		// 	[
		// 		'type'  => 'prefix',
		// 		'class' => 's-dark'
		// 	],
		// 	[
		// 		'type'   => 'class',
		// 		'class'  => 'has-lb',
		// 		'search'  => ['.mfp-']
		// 	],
		// 	[
		// 		'type'   => 'any',
		// 		'class'  => 'has-lb',
		// 		'search'  => ['.mfp-']
		// 	],
		// ];

		if ($this->allow_selectors) {			
			foreach ($this->allow_selectors as $include) {

				/**
				 * Prefix-based + all other classes/tags/etc. in selector exist in doc.
				 * 
				 * Note: It's basically to ignore the first class and include the sub-classes based
				 * on their existence in doc. Example: .scheme-dark or .scheme-light.
				 */ 
				if ($include['type'] === 'prefix') {
					
					// Check if exact match.
					if (('.' . $include['class']) === $selector['selector']) {
						return true;
					}

					// Check if first class matches.
					$has_prefix = $include['class'] === substr($selector['selector'], 1, strlen($include['class']));
					if ($has_prefix) {
						
						// Will check for validity later below. Remove first class as it's allowed.
						if (isset($selector['classes'])) {
							$selector['classes'] = array_diff($selector['classes'], [$include['class']]);
						}

						// WARNING: Due to this break, if there's a rule to allow all selectors of this prefix 
						// that appear later, it won't be validated.
						// @todo Sort prefixes to be at the end or run them later.
						break;
					}

					continue;
				}

				// Check if a class exists in document.
				if ($include['type'] === 'class') {
					if (!$this->is_used($include['class'], 'classes')) {
						continue;
					}
				}

				// Simple search selector string.
				// $search = !empty($include['search']) ? (array) $include['search'] : [];
				
				// Any type, normal selector string match.
				// Note: The regex is equal at n=1 and faster at n>1, surprisingly.
				// if ($search) {
					// foreach ($search as $to_match) {
					// 	if (strpos($selector['selector'], $to_match) !== false) {
					// 		return true;
					// 	}
					// }
				// }

				// Pre-computed regex - combined 'search' and 'search_regex'.
				if (!empty($include['computed_search_regex'])) {
					if (preg_match('#' . $include['computed_search_regex'] . '#', $selector['selector'])) {
						return true;
					}
				}
			}
		}

		$valid = true;
		if (
			// Check if all classes are used.
			(!empty($selector['classes']) && !$this->is_used($selector['classes'], 'classes'))

			// Check if all the ids are used.
			|| (!empty($selector['ids']) && !$this->is_used($selector['ids'], 'ids'))

			// Check for the target tags in used.
			|| (!empty($selector['tags']) && !$this->is_used($selector['tags'], 'tags'))
		) {
			$valid = false;
		}

		return $valid;
	}

	/**
	 * Test if a selector classes, ids, or tags are used in the doc (provided in $this->used_markup).
	 *
	 * @param string|array $targets
	 * @param string $type 'classes', 'tags', or 'ids'.
	 * @return boolean
	 */
	public function is_used($targets, $type = '')
	{
		if (!$type) {
			return false;
		}

		if (!is_array($targets)) {
			$targets = (array) $targets;
		}

		foreach ($targets as $target) {
			// All targets must exist.
			if (!isset($this->used_markup[$type][$target])) {	
				return false;
			}
		}

		return true;
	}

}