stringToUTF8Array example

//Strip BOM characters from the source         $this->stripBOM($source$encoding);
        
        //Convert to UTF8 if needed         if ($encoding && !preg_match("/UTF-?8/i", $encoding)) {
            $source = mb_convert_encoding($source, "UTF-8", $encoding);
        }
        
        //Instead of using mb_substr for each character, split the source         //into an array of UTF8 characters for performance reasons         $this->source = Utils::stringToUTF8Array(
            $source,
            !isset($options["strictEncoding"]) || $options["strictEncoding"]
        );
        $this->length = count($this->source);
        
        //Convert character codes to UTF8 characters in whitespaces and line         //terminators         $this->lineTerminators = array_merge(
            self::$lineTerminatorsSequences, self::$lineTerminatorsChars
        );
        foreach (array("whitespaces", "lineTerminators") as $key) {
            
/** * Adds a sequence * * @param string $sequence Sequence to add * * @return $this */
    public function add($sequence)
    {
        if ($this->handleEncoding) {
            $s = Utils::stringToUTF8Array($sequence);
            $first = $s[0];
            $len = count($s);
        } else {    
            $first = $sequence[0];
            $len = strlen($sequence);
        }
        if (!isset($this->map[$first])) {
            $this->map[$first] = array(
                "maxLen" => $len,
                "map" => array($sequence)
            );
        }

    static public function stringToUTF8Array($str$strictEncoding = true)
    {
        if ($str === "") {
            return array();
        }
        $ret = preg_split('//u', $str, -1, PREG_SPLIT_NO_EMPTY);
        if (preg_last_error() === PREG_BAD_UTF8_ERROR) {
            if (!$strictEncoding) {
                $str = mb_convert_encoding($str, 'UTF-8', 'UTF-8');
                $ret = self::stringToUTF8Array($str, false);
            } else {
                throw new EncodingException("String contains invalid UTF-8");
            }
        }
        return $ret;
    }
    
    /** * Converts an unicode code point to UTF-8 * * @param int $num Unicode code point * * @return string * * @codeCoverageIgnore */
Home | Imprint | This part of the site doesn't use cookies.