public function analyze(string
$text, ?string
$langcode = NULL
): string
{ // Decode entities to UTF-8.
$text = Html::
decodeEntities($text);
// Lowercase.
$text =
mb_strtolower($text);
// Remove diacritics.
$text =
$this->transliteration->
removeDiacritics($text);
// Call an external processor for word handling.
$this->
invokePreprocess($text,
$langcode);
// Simple CJK handling.
if ($this->configFactory->
get('search.settings'
)->
get('index.overlap_cjk'
)) { $text =
preg_replace_callback('/[' . self::PREG_CLASS_CJK . ']+/u',
[$this, 'expandCjk'
],
$text);
} // To improve searching for numerical data such as dates, IP addresses
// or version numbers, we consider a group of numerical characters
// separated only by punctuation characters to be one piece.
// This also means that searching for e.g. '20/03/1984' also returns
// results with '20-03-1984' in them.