CodeExplorer

analyze example

    // their own lines). So the even-numbered lines should simplify to nothing,     // and the odd-numbered lines we need to split into shorter chunks and     // verify that text processing doesn't lose any characters.     $input = file_get_contents($this->root . '/core/modules/search/tests/UnicodeTest.txt');
    $basestrings = explode(chr(10), $input);
    $strings = [];
    $text_processor = \Drupal::service('search.text_processor');
    assert($text_processor instanceof SearchTextProcessorInterface);
    foreach ($basestrings as $key => $string) {
      if ($key % 2) {
        // Even line - should simplify down to a space.         $simplified = $text_processor->analyze($string);
        $this->assertSame(' ', $simplified, "Line {$key} is excluded from the index");
      }
      else {
        // Odd line, should be word characters.         // Split this into 30-character chunks, so we don't run into limits of         // truncation in         // \Drupal\search\SearchTextProcessorInterface::analyze().         $start = 0;
        while ($start < mb_strlen($string)) {
          $newstr = mb_substr($string, $start, 30);
          // Special case: leading zeros are removed from numeric strings,

// Strip off phrase quotes.       $phrase = FALSE;
      if ($match[2][0] == '"') {
        $match[2] = substr($match[2], 1, -1);
        $phrase = TRUE;
        $this->simple = FALSE;
      }

      // Simplify keyword according to indexing rules and external       // preprocessors. Use same process as during search indexing, so it       // will match search index.       $words = $text_processor->analyze($match[2]);
      // Re-explode in case simplification added more words, except when       // matching a phrase.       $words = $phrase ? [$words] : preg_split('/ /', $words, -1, PREG_SPLIT_NO_EMPTY);
      // Negative matches.       if ($match[1] == '-') {
        $this->keys['negative'] = array_merge($this->keys['negative'], $words);
      }
      // OR operator: instead of a single keyword, we store an array of all       // ORed keywords.       elseif ($match[2] == 'OR' && count($this->keys['positive'])) {
        $last = array_pop($this->keys['positive']);

'field5' => false,
            'field6' => 10.99999,
            'nestedField' => [
                'value' => 'nested',
                'second' => 'ignored',
            ],
            'ignored' => 'ignored',
        ]);

        $analyzer = $this->getContainer()->get(ProductSearchKeywordAnalyzer::class);

        $result = $analyzer->analyze($product, Context::createDefaultContext(), $config);

        $words = $result->map(fn (AnalyzedKeyword $keyword) => $keyword->getKeyword());

        static::assertEquals(
            ['searchable', 'match', 'array', '10000000', '10.99999', 'nested'],
            array_values($words)
        );
    }

    /** * @dataProvider casesSearchBaseOnConfigField */

define( 'GETID3_TEMP_DIR', get_temp_dir() );
    }

    if ( ! class_exists( 'getID3', false ) ) {
        require ABSPATH . WPINC . '/ID3/getid3.php';
    }

    $id3 = new getID3();
    // Required to get the `created_timestamp` value.     $id3->options_audiovideo_quicktime_ReturnAtomData = true; // phpcs:ignore WordPress.NamingConventions.ValidVariableName
    $data = $id3->analyze( $file );

    if ( isset( $data['video']['lossless'] ) ) {
        $metadata['lossless'] = $data['video']['lossless'];
    }

    if ( ! empty( $data['video']['bitrate'] ) ) {
        $metadata['bitrate'] = (int) $data['video']['bitrate'];
    }

    if ( ! empty( $data['video']['bitrate_mode'] ) ) {
        $metadata['bitrate_mode'] = $data['video']['bitrate_mode'];
    }

$type = $input->getArgument('type');
        $query = $input->getArgument('query');
        $analyzer = $input->getArgument('analyzer');

        $shop = $this->container->get(ShopGatewayInterface::class)->get($shopId);
        if (!$shop instanceof ShopStruct) {
            throw new StructNotFoundException(ShopStruct::class, $shopId);
        }
        $client = $this->container->get(Client::class);
        $index = $this->container->get(IndexFactory::class)->createShopIndex($shop, $type);

        $analyzed = $client->indices()->analyze([
            'index' => $index->getName(),
            'analyzer' => $analyzer,
            'text' => $query,
        ]);

        $tokens = $analyzed['tokens'];

        $table = new Table($output);
        $table->setHeaders(['Token', 'Start', 'End', 'Type', 'position'])
            ->setRows($tokens)
            ->render();

  public function __construct(TransliterationInterface $transliteration, ConfigFactoryInterface $config_factory, ModuleHandlerInterface $module_handler) {
    $this->transliteration = $transliteration;
    $this->configFactory = $config_factory;
    $this->moduleHandler = $module_handler;
  }

  /** * {@inheritdoc} */
  public function process(string $text, ?string $langcode = NULL): array {
    $text = $this->analyze($text, $langcode);
    return explode(' ', $text);
  }

  /** * {@inheritdoc} */
  public function analyze(string $text, ?string $langcode = NULL): string {
    // Decode entities to UTF-8.     $text = Html::decodeEntities($text);

    // Lowercase.

    public function testAnalyze(array $productData, array $configFields, array $expected): void
    {
        $product = new ProductEntity();
        $product->assign($productData);

        $tokenizer = new Tokenizer(3);
        $tokenFilter = $this->createMock(TokenFilter::class);
        $tokenFilter->method('filter')->willReturnCallback(fn (array $tokens) => $tokens);

        $analyzer = new ProductSearchKeywordAnalyzer($tokenizer, $tokenFilter);
        $analyzer = $analyzer->analyze($product, $this->context, $configFields);
        $analyzerResult = $analyzer->getKeys();

        sort($analyzerResult);
        sort($expected);

        static::assertEquals($expected, $analyzerResult);
    }

    /** * The old implementation relied on the error_reporting level, to also report notices as errors. * This test ensures that the new implementation does not rely on the error_reporting level. * * @dataProvider analyzeCases * * @param array<string, mixed> $productData * @param array<int, array{field: string, tokenize: bool, ranking: int}> $configFields * @param array<int, string> $expected */

$term = $input->getArgument('term');

        $iteration = $this->getAnalyzers();

        $rows = [];
        foreach ($iteration as $headline => $analyzers) {
            $rows[] = [$headline];
            $rows[] = ['###############'];
            foreach ($analyzers as $analyzer) {
                /** @var array{'tokens': array{token: string}[]} $analyzed */
                $analyzed = $this->client->indices()->analyze([
                    'body' => [
                        'analyzer' => $analyzer,
                        'text' => $term,
                    ],
                ]);

                $rows[] = [
                    'Analyzer' => $analyzer,
                    'Tokens' => implode(' ', array_column($analyzed['tokens'], 'token')),
                ];
            }

$iterator = $this->getIterator($ids, $context, $configFields);

        while ($products = $iterator->fetch()) {
            foreach ($products->getEntities() as $product) {
                // overwrite fetched products if translations for that product exists                 // otherwise we use the already fetched product from the parent language                 $existingProducts[$product->getId()] = $product;
            }
        }

        foreach ($existingProducts as $product) {
            $analyzed = $this->analyzer->analyze($product, $context, $configFields);

            $productId = Uuid::fromHexToBytes($product->getId());

            foreach ($analyzed as $keyword) {
                $keywords[] = [
                    'id' => Uuid::randomBytes(),
                    'version_id' => $versionId,
                    'product_version_id' => $versionId,
                    'language_id' => $languageId,
                    'product_id' => $productId,
                    'keyword' => $keyword->getKeyword(),

foreach ($starts as $key => $value) {
      $chars[] = $this->code2utf($starts[$key]);
      $mid = round(0.5 * ($starts[$key] + $ends[$key]));
      $chars[] = $this->code2utf($mid);
      $chars[] = $this->code2utf($ends[$key]);
    }

    // Merge into a string and tokenize.     $string = implode('', $chars);
    $text_processor = \Drupal::service('search.text_processor');
    assert($text_processor instanceof SearchTextProcessorInterface);
    $out = trim($text_processor->analyze($string));
    $expected = mb_strtolower(implode(' ', $chars));

    // Verify that the output matches what we expect.     $this->assertEquals($expected, $out, 'CJK tokenizer worked on all supplied CJK characters');
  }

  /** * Verifies that strings of non-CJK characters are not tokenized. * * This is just a sanity check - it verifies that strings of letters are * not tokenized. */