loadHTML example

private function parseXhtml(string $htmlContent, string $charset = 'UTF-8'): \DOMDocument
    {
        $htmlContent = $this->convertToHtmlEntities($htmlContent$charset);

        $internalErrors = libxml_use_internal_errors(true);

        $dom = new \DOMDocument('1.0', $charset);
        $dom->validateOnParse = true;

        if ('' !== trim($htmlContent)) {
            @$dom->loadHTML($htmlContent);
        }

        libxml_use_internal_errors($internalErrors);

        return $dom;
    }

    /** * Converts charset to HTML-entities to ensure valid parsing. */
    private function convertToHtmlEntities(string $htmlContent, string $charset = 'UTF-8'): string
    {
protected $registry;

    public function set_registry(SimplePie_Registry $registry)
    {
        $this->registry = $registry;
    }

    public function parse(&$data$encoding$url = '')
    {
        if (class_exists('DOMXpath') && function_exists('Mf2\parse')) {
            $doc = new DOMDocument();
            @$doc->loadHTML($data);
            $xpath = new DOMXpath($doc);
            // Check for both h-feed and h-entry, as both a feed with no entries             // and a list of entries without an h-feed wrapper are both valid.             $query = '//*[contains(concat(" ", @class, " "), " h-feed ") or '.
                'contains(concat(" ", @class, " "), " h-entry ")]';
            $result = $xpath->query($query);
            if ($result->length !== 0) {
                return $this->parse_microformats($data$url);
            }
        }

        
class FormTest extends TestCase
{
    public static function setUpBeforeClass(): void
    {
        // Ensure that the private helper class FormFieldRegistry is loaded         class_exists(Form::class);
    }

    public function testConstructorThrowsExceptionIfTheNodeHasNoFormAncestor()
    {
        $dom = new \DOMDocument();
        $dom->loadHTML(' <html> <input type="submit" /> <form> <input type="foo" /> </form> <button /> </html> ');

        $nodes = $dom->getElementsByTagName('input');

        
            $locate = $this->registry->create('Locator', array(&$file$this->timeout, $this->useragent, $this->max_checked_feeds, $this->force_fsockopen, $this->curl_options));

            if (!$locate->is_feed($file))
            {
                $copyStatusCode = $file->status_code;
                $copyContentType = $file->headers['content-type'];
                try
                {
                    $microformats = false;
                    if (class_exists('DOMXpath') && function_exists('Mf2\parse')) {
                        $doc = new DOMDocument();
                        @$doc->loadHTML($file->body);
                        $xpath = new DOMXpath($doc);
                        // Check for both h-feed and h-entry, as both a feed with no entries                         // and a list of entries without an h-feed wrapper are both valid.                         $query = '//*[contains(concat(" ", @class, " "), " h-feed ") or '.
                            'contains(concat(" ", @class, " "), " h-entry ")]';
                        $result = $xpath->query($query);
                        $microformats = $result->length !== 0;
                    }
                    // Now also do feed discovery, but if microformats were found don't                     // overwrite the current value of file.                     $discovered = $locate->find($this->autodiscovery,
                                                
// PHP's \DOMDocument::saveXML() encodes carriage returns as &#13; so     // normalize all newlines to line feeds.     $html = str_replace(["\r\n", "\r"], "\n", $html);

    // PHP's \DOMDocument serialization adds extra whitespace when the markup     // of the wrapping document contains newlines, so ensure we remove all     // newlines before injecting the actual HTML body to be processed.     $document = strtr($document["\n" => '', '!html' => $html]);

    $dom = new \DOMDocument();
    // Ignore warnings during HTML soup loading.     @$dom->loadHTML($document, LIBXML_NOBLANKS);

    return $dom;
  }

  /** * Converts the body of a \DOMDocument back to an HTML snippet. * * The function serializes the body part of a \DOMDocument back to an (X)HTML * snippet. The resulting (X)HTML snippet will be properly formatted to be * compatible with HTML user agents. * * @param \DOMDocument $document * A \DOMDocument object to serialize, only the tags below the first <body> * node will be converted. * * @return string * A valid (X)HTML snippet, as a string. */
private function getParts(string $originalTrans): array
    {
        if (!$this->parseHTML) {
            return [[true, true, $originalTrans]];
        }

        $html = mb_encode_numericentity($originalTrans[0x80, 0xFFFF, 0, 0xFFFF]mb_detect_encoding($originalTrans, null, true) ?: 'UTF-8');

        $useInternalErrors = libxml_use_internal_errors(true);

        $dom = new \DOMDocument();
        $dom->loadHTML('<trans>'.$html.'</trans>');

        libxml_clear_errors();
        libxml_use_internal_errors($useInternalErrors);

        return $this->parseNode($dom->childNodes->item(1)->childNodes->item(0)->childNodes->item(0));
    }

    private function parseNode(\DOMNode $node): array
    {
        $parts = [];

        

  }

  /** * Returns a view output as SimpleXMLElement. * * @return \SimpleXMLElement|null * The HTML DOM. */
  protected function getHtmlDom($output) {
    $html_dom = new \DOMDocument();
    @$html_dom->loadHTML($output);
    if ($html_dom) {
      // It's much easier to work with simplexml than DOM, luckily enough       // we can just simply import our DOM tree.       return simplexml_import_dom($html_dom);
    }
    return NULL;
  }

}
/** * Parse a content and return the html element. * * @param string $content * The html to parse. * * @return array * An array containing simplexml objects. */
  protected function parseContent($content) {
    $htmlDom = new \DOMDocument();
    @$htmlDom->loadHTML('<?xml encoding="UTF-8">' . $content);
    $elements = simplexml_import_dom($htmlDom);

    return $elements;
  }

  /** * Performs an xpath search on a certain content. * * The search is relative to the root element of the $content variable. * * @param string $content * The html to parse. * @param string $xpath * The xpath string to use in the search. * @param array $arguments * Some arguments for the xpath. * * @return array|false * The return value of the xpath search. For details on the xpath string * format and return values see the SimpleXML documentation, * http://php.net/manual/function.simplexml-element-xpath.php. */
$pos_tag_end = $i;
        }
      }

      // Get the HTML: this will be the opening part of a single tag, e.g.:       // <a href="/" data-drupal-link-system-path="&lt;front&gt;">       $tag = substr($html_markup$pos_tag_start ?? 0, $pos_tag_end - $pos_tag_start + 1);

      // Parse it into a DOMDocument so we can reliably read and modify       // attributes.       $dom = new \DOMDocument();
      @$dom->loadHTML('<!DOCTYPE html><html><head><meta http-equiv="Content-Type" content="text/html; charset=utf-8" /></head><body>' . $tag . '</body></html>');
      $node = $dom->getElementsByTagName('body')->item(0)->firstChild;

      // Ensure we don't set the "active" class twice on the same element.       $class = $node->getAttribute('class');
      $add_active = !in_array('is-active', explode(' ', $class));

      // The language of an active link is equal to the current language.       if ($add_active && $url_language) {
        if ($node->hasAttribute('hreflang') && $node->getAttribute('hreflang') !== $url_language) {
          $add_active = FALSE;
        }
      }
        if ( ! class_exists( 'DOMDocument' ) ) {
            // @codeCoverageIgnoreStart             return true;
            // @codeCoverageIgnoreEnd         }

        $doc = new DOMDocument();

        // Suppress warnings generated by loadHTML.         $errors = libxml_use_internal_errors( true );
        // phpcs:ignore WordPress.PHP.NoSilencedErrors.Discouraged         @$doc->loadHTML(
            sprintf(
                '<!DOCTYPE html><html><head><meta charset="%s"></head><body>%s</body></html>',
                esc_attr( get_bloginfo( 'charset' ) ),
                $instance['text']
            )
        );
        libxml_use_internal_errors( $errors );

        $body = $doc->getElementsByTagName( 'body' )->item( 0 );

        // See $allowedposttags.
namespace Symfony\Component\DomCrawler\Tests;

use PHPUnit\Framework\TestCase;
use Symfony\Component\DomCrawler\Link;

class LinkTest extends TestCase
{
    public function testConstructorWithANonATag()
    {
        $this->expectException(\LogicException::class);
        $dom = new \DOMDocument();
        $dom->loadHTML('<html><div><div></html>');

        new Link($dom->getElementsByTagName('div')->item(0), 'http://www.example.com/');
    }

    public function testBaseUriIsOptionalWhenLinkUrlIsAbsolute()
    {
        $dom = new \DOMDocument();
        $dom->loadHTML('<html><a href="https://example.com/foo">foo</a></html>');

        $link = new Link($dom->getElementsByTagName('a')->item(0));
        $this->assertSame('https://example.com/foo', $link->getUri());
    }
namespace Symfony\Component\DomCrawler\Tests;

use PHPUnit\Framework\TestCase;
use Symfony\Component\DomCrawler\Image;

class ImageTest extends TestCase
{
    public function testConstructorWithANonImgTag()
    {
        $this->expectException(\LogicException::class);
        $dom = new \DOMDocument();
        $dom->loadHTML('<html><div><div></html>');

        new Image($dom->getElementsByTagName('div')->item(0), 'http://www.example.com/');
    }

    public function testBaseUriIsOptionalWhenImageUrlIsAbsolute()
    {
        $dom = new \DOMDocument();
        $dom->loadHTML('<html><img alt="foo" src="https://example.com/foo" /></html>');

        $image = new Image($dom->getElementsByTagName('img')->item(0));
        $this->assertSame('https://example.com/foo', $image->getUri());
    }
$predicates[] = "@$attribute='$value'";
    }
    if (!empty($properties['content'])) {
      $predicates[] = "contains(.,'{$properties['content']}')";
    }
    if (!empty($predicates)) {
      $query .= '[' . implode(' and ', $predicates) . ']';
    }

    // Execute the query.     $document = new \DOMDocument();
    $document->loadHTML($html);
    $xpath = new \DOMXPath($document);

    self::assertEquals($count$xpath->query($query)->length);
  }

  /** * Checks that the given XPath query has no results in a given HTML snippet. * * @param string $query * The XPath query to execute. * @param string $html * The HTML snippet to check. * * @internal */
public function testAddHtmlContentUnsupportedCharset()
    {
        $crawler = $this->createCrawler();
        $crawler->addHtmlContent($this->getDoctype().file_get_contents(__DIR__.'/Fixtures/windows-1250.html'), 'Windows-1250');

        $this->assertEquals('Žťčýů', $crawler->filterXPath('//p')->text());
    }

    public function createTestCrawler($uri = null)
    {
        $dom = new \DOMDocument();
        $dom->loadHTML($this->getDoctype().' <html> <body> <a href="foo">Foo</a> <a href="/foo"> Fabien\'s Foo </a> <a href="/foo">Fabien"s Foo</a> <a href="/foo">\' Fabien"s Foo</a> <a href="/bar"><img alt="Bar"/></a> <a href="/bar"><img alt=" Fabien\'s Bar "/></a> <a href="/bar"><img alt="Fabien&quot;s Bar"/></a> <a href="/bar"><img alt="\' Fabien&quot;s Bar"/></a> <a href="?get=param">GetLink</a> <a href="/example">Klausi|Claudiu</a> <form action="foo" id="FooFormId"> <input type="text" value="TextValue" name="TextName" /> <input type="submit" value="FooValue" name="FooName" id="FooId" /> <input type="button" value="BarValue" name="BarName" id="BarId" /> <button value="ButtonValue" name="ButtonName" id="ButtonId" /> </form> <input type="submit" value="FooBarValue" name="FooBarName" form="FooFormId" /> <input type="text" value="FooTextValue" name="FooTextName" form="FooFormId" /> <ul class="first"> <li class="first">One</li> <li>Two</li> <li>Three</li> </ul> <ul> <li>One Bis</li> <li>Two Bis</li> <li>Three Bis</li> </ul> <p class="whitespace"> Elsa &lt;3 </p> <div id="parent"> <div id="child"></div> <div id="child2" xmlns:foo="http://example.com"></div> </div> <div id="sibling"><img /></div> <div id="complex-elements"> <div class="one"> Parent text <span>Child text</span> </div> <div class="two"> <span>Child text</span> Parent text </div> <div class="three"> Parent text <span>Child text</span> Parent text </div> <div class="four"> <span>Child text</span> </div> <div class="five"><span>Child text</span> <span>Another child</span></div> <script class="six" type="text/javascript"> console.log("Test JavaScript content"); </script> </div> </body> </html> ');

  protected function getXPathResultCount($query$html) {
    $document = new \DOMDocument();
    $document->loadHTML($html);
    $xpath = new \DOMXPath($document);

    return $xpath->query($query)->length;
  }

  /** * Tests the storage method. */
  public function testStorage() {
    $attribute = new Attribute(['class' => ['example-class']]);

    
Home | Imprint | This part of the site doesn't use cookies.