php自动获取关键字代码
<?php
$mincipin = 5; //最小词频
$minlen = 4; //关键字最小长度
tiqukeyword($tiqustr, $minlen, $mincipin);
function tiqukeyword($tiqustr, $minlen, $mincipin) {
$strlong = strlen($tiqustr);
$arr = array();
$k = - 1;
for ($i = 0; $i < ($strlong - $mincipin * $minlen); $i++) {
$end = ceil(($strlong - $i) / $mincipin + $i);
for ($j = $minlen; $j < $end; $j++) {
$num = 0;
if (($guanjianzi = substr($tiqustr, $i, $j)) !== false) {
$wz = $i + $j;
$num++;
} else {
break;
}
while ($wz < $strlong) {
if (($wz = strpos($tiqustr, $guanjianzi, $wz)) !== false) {
$num++;
$wz = $wz + strlen($guanjianzi);
} else break;
}
if ($j == $minlen) {
if ($num >= $mincipin) {
$maxnum = $num;
$k++;
$str = substr($tiqustr, $i, $j);
$arr[$k] = array(
$i,
$j,
$str,
$num,
0
);
} else {
break;
}
} else {
if ($num >= $maxnum) {
$maxnum = $num;
$str = substr($tiqustr, $i, $j);
$arr[$k] = array(
$i,
$j,
$str,
$num,
0
);
} else break;
}
}
}
echo '初步得到的数组:';
print_r($arr);
//echo '<br/><br/><br/><br/>';
$arrlong = count($arr);
for ($i = 0; $i < $arrlong; $i++) {
$bjarr = $arr[$i];
$nowid = $i;
if ($bjarr[4] == 1) continue;
for ($j = $i + 1; $j < $arrlong; $j++) {
if ($arr[$j][4] == 1) continue;
$qujianks = $bjarr[0];
$qujianjs = $bjarr[1] + $bjarr[0] - 1;
$a = $arr[$j][0];
$b = $arr[$j][1] + $arr[$j][0] - 1;
if (($bjarr[2] == $arr[$j][2]) && ($bjarr[3] > $arr[$j][3])) $arr[$j][4] = 1;
if ($a <= $qujianks && $qujianks <= $b && $a <= $qujianjs && $qujianjs <= $b) {
if ($bjarr[3] <= $arr[$j][3]) {
$arr[$nowid][4] = 1;
$nowid = $j;
$bjarr = $arr[$j];
}
} elseif ($qujianks <= $a && $a <= $qujianjs && $qujianks <= $b && $b <= $qujianjs) {
$arr[$j][4] = 1;
}
}
}
echo '<br/><br/><br/><br/>重叠加标记后的数组:';
print_r($arr);
//开源代码phprm.com
$jieguoarr = array();
for ($i = 0; $i < $arrlong; $i++) {
if ($arr[$i][4] == 0) $jieguoarr[] = $arr[$i];
}
echo '<br/><br/><br/><br/>';
echo '最后得到的数组:';
print_r($jieguoarr);
}文章链接:http://www.phprm.com/develop/fs5052.html
随便收藏,请保留本文地址!