This paper gives an example of how to cut Chinese characters in PHP. To share with you for your reference, as follows:
<?php
/*
@UTF-8 encoded characters may consist of 1-3 bytes.
*/
/*--------------------------Method 1: Chinese string truncation------------------------------*/
function msubstr($str, $start, $len)
{
$tmpstr = "";
$strlen = $start + $len;
for ($i = 0; $i < $strlen; $i++) {
The if (ord (substr ($STR, $I, 1)) > 0xa0) // ord() function returns the ASCII value of the first character of the string
{
$tmpstr .= substr($str, $i, 2);
$i++;
} else {
$tmpstr .= substr($str, $i, 1);
}
}
return $tmpstr;
}
/*----------------------------The second method-----------------------------------*/
//UTF-8 string intercepted
function utf_substr($str, $len)
{
$new_str = [];
for ($i = 0; $i < $len; $i++) {
$tem_str = substr($str, 0, 1);
if (ord($tem_str > 127)) {
$i++;
if ($i < $len) {
$new_str[] = substr($str, 0, 3);
$str = substr($str, 3);
}
} else {
$new_str[] = substr($str, 0, 1);
$str = substr($str, 1);
}
}
Return join ($new_str); // join() function combines array elements into a string
}
/*-------------------------------------The third method (UTF-8)--------------------------------*/
function cutstr($string, $length)
{
preg_match_all("/[\x01-\x7f]|[\xc2-\xdf]|[\x80-\xbf]|\xe0[\xa0-\xbf][\x80-\xbf]|[\xe1-\xef][\x80-\xbf][\x80-\xbf]|\xf0[\x90-\xbf][\x80-\xbf][\x80-\xbf]|[\xf1-\xf7][\x80-\xbf][\x80-\xbf][\x80-\xbf]/", $string, $info);
$wordscut = "";
$j = 0;
for ($i = 0; $i < count($info[0]); $i++) {
$wordscut .= $info[0][$i];
$j = ord($info[0][$i]) > 127 ? $j + 2 : $j + 1;
if ($j > $length - 3) {
return $wordscut . "...";
}
}
return join('', $info[0]);
}
$string = "312 ha ha, this combination is hard to cut";
echo cutstr($string, 10);
/*---------------------------------Here's what we've used to intercept the third string------------------------------*/
// $name1 = mysql_result($my_rst,0,"name");
// $name = preg_match("/([1-9][0-9]+)/",$name1,$r);
// $name = $r[0];
// if($name == ""){
// $name=preg_replace('#^(?:[\x00-\x7F]|[\xC0-\xFF][\x80-\xBF]+){0,2}'.
// '((?:[\x00-\x7F]|[\xC0-\xFF][\x80-\xBF]+){0,1}).*#s',
// '$1',$name1);
// }
/*--------------------------------------------The fourth method (UTF-8)---------------------------------------------*/
function cut_str($sourcestr, $cutlength)
{
$returnstr = '';
$i = 0;
$n = 0;
$str_length = strlen ($sourcestr); // number of bytes in string
while ($n < $cutlength && $i <= $str_length) {
$temp_str = substr($sourcestr, $i, 1);
$ascnum = ord ($temp_str); // get the ASCII code of the $I character in the string
if ($ascnum >= 224) {
$returnStr = $returnStr. Substr ($sourcestr, $I, 3); // count 3 consecutive characters as a single character according to UTF-8 encoding specification
$I = $I + 3; // the actual byte is recorded as 3
$n + +; // string length is 1
}Else if ($ascnum > = 192) // if ASCII bit is higher than 192
{
$returnStr = $returnStr. Substr ($sourcestr, $I, 2); // according to UTF-8 encoding specification, 2 consecutive characters are recorded as a single character
$I = $I + 2; // actual byte is recorded as 2
$n + +; // string length is 1
}Else if ($ascnum > = 65 & & $ascnum < = 90) // if it's a capital letter
{
$returnstr = $returnstr . substr($sourcestr, $i, 1);
$I = $I + 1; // byte is recorded as 1
$n + +; // but considering the overall beauty, uppercase letters count as a high-order character
} else {
$returnstr = $returnstr . substr($sourcestr, $i, 1);
$I = $I + 1; // the actual byte is recorded as 1
$n = $n + 0.5; // width of lower case letters and half width punctuation with half height characters
}
}
if ($str_length > $cutlength) {
$returnStr = $returnStr. "..."; // add an ellipsis at the end when the length is exceeded
}
return $returnstr;
}
/*--------------------The fifth method (UTF-8)---------------------------------------------*/
function FSubstr($title, $start, $len = "", $magic = true)
{
if ($len == "") $len = strlen($title);
if ($start != 0) {
$startv = ord(substr($title, $start, 1));
if ($startv >= 128) {
if ($startv < 192) {
for ($i = $start - 1; $i > 0; $i--) {
$tempv = ord(substr($title, $i, 1));
if ($tempv >= 192) break;
}
$start = $i;
}
}
}
if (strlen($title) <= $len) return substr($title, $start, $len);
$alen = 0;
$blen = 0;
$realnum = 0;
$length = 0;
for ($i = $start; $i < strlen($title); $i++) {
$ctype = 0;
$cstep = 0;
$cur = substr($title, $i, 1);
if ($cur == "&") {
if (substr($title, $i, 4) == "<") {
$cstep = 4;
$length += 4;
$i += 3;
$realnum++;
if ($magic) {
$alen++;
}
} elseif (substr($title, $i, 4) == ">") {
$cstep = 4;
$length += 4;
$i += 3;
$realnum++;
if ($magic) {
$alen++;
}
} elseif (substr($title, $i, 5) == "&") {
$cstep = 5;
$length += 5;
$i += 4;
$realnum++;
if ($magic) {
$alen++;
}
} elseif (substr($title, $i, 6) == """) {
$cstep = 6;
$length += 6;
$i += 5;
$realnum++;
if ($magic) {
$alen++;
}
} elseif (preg_match("/(\d+);?/i", substr($title, $i), $match)) {
$cstep = strlen($match[0]);
$length += strlen($match[0]);
$i += strlen($match[0]) - 1;
$realnum++;
if ($magic) {
$blen++;
$ctype = 1;
}
}
} else {
if (ord($cur) >= 252) {
$cstep = 6;
$length += 6;
$i += 5;
$realnum++;
if ($magic) {
$blen++;
$ctype = 1;
}
} elseif (ord($cur) >= 248) {
$cstep = 5;
$length += 5;
$i += 4;
$realnum++;
if ($magic) {
$ctype = 1;
$blen++;
}
} elseif (ord($cur) >= 240) {
$cstep = 4;
$length += 4;
$i += 3;
$realnum++;
if ($magic) {
$blen++;
$ctype = 1;
}
} elseif (ord($cur) >= 224) {
$cstep = 3;
$length += 3;
$i += 2;
$realnum++;
if ($magic) {
$ctype = 1;
$blen++;
}
} elseif (ord($cur) >= 192) {
$ctype = 2;
$length += 2;
$i += 1;
$realnum++;
if ($magic) {
$blen++;
$ctype = 1;
}
} elseif (ord($cur) >= 128) {
$length += 1;
} else {
$cstep = 1;
$length += 1;
$realnum++;
if ($magic) {
if (ord($cur) >= 65 && ord($cur) <= 90) {
$blen++;
} else {
$alen++;
}
}
}
}
if ($magic) {
if (($blen * 2 + $alen) == ($len * 2)) break;
if (($blen * 2 + $alen) == ($len * 2) + 1) {
if ($ctype == 1) {
$length -= $cstep;
break;
} else {
break;
}
}
} else {
if ($realnum == $len) break;
}
}
unset($cur);
unset($alen);
unset($blen);
unset($realnum);
unset($ctype);
unset($cstep);
return substr($title, $start, $length);
}
function utf8Substr($str, $from, $len)
{
return preg_replace('#^(?:[\x00-\x7F]|[\xC0-\xFF][\x80-\xBF]+){0,' . $from . '}' .
'((?:[\x00-\x7F]|[\xC0-\xFF][\x80-\xBF]+){0,' . $len . '}).*#s',
'$1', $str);
}
$title = "you're Harper niad1 NASDA wop asdni, you love nobody but Aston Satan 12ccs SD";
$title = utf8Substr($title, 0, 15);
echo $title;
?>
For more information about PHP, readers interested in PHP can see our special topics: summary of PHP encoding and transcoding operation skills, PHP array operation skills, PHP string usage summary, PHP common functions and skills summary and PHP error and exception handling summary
I hope this article is helpful for PHP programming.