PHP获取搜索引擎来源的关键字

2014-06-09 12:42  2581人阅读  评论 (0)

获取搜索引擎来源关键字的函数

function getKeywords() {
    // 搜索引擎关键字映射
    static $host_keyword_map = array(
            'www.baidu.com' => 'wd',
            'v.baidu.com' => 'word',
            'image.baidu.com' => 'word',
            'news.baidu.com' => 'word',
            'www.so.com' => 'q',
            'video.so.com' => 'q',
            'image.so.com' => 'q',
            'news.so.com' => 'q',
            'www.sogou.com' => 'query',
            'pic.sogou.com' => 'query',
            'v.sogou.com' => 'query',
    );

    // 检查来源是否搜索引擎
    if (!isset($_SERVER['HTTP_REFERER'])) {
        return '';
    }
    $urls = parse_url($_SERVER['HTTP_REFERER']);
    if (!array_key_exists($urls['host'], $host_keyword_map)) {
        return '';
    }
    $key = $host_keyword_map[$urls['host']];

    // 检查关键字参数是否存在
    if (!isset($urls['query'])) {
        return '';
    }
    $params = array();
    parse_str($urls['query'], $params);
    if (!isset($params[$key])) {
        return '';
    }
    $keywords = $params[$key];

    // 检查编码
    $encoding = mb_detect_encoding($keywords, 'utf-8,gbk');
    if ($encoding != 'utf-8') {
        $keywords = iconv($encoding, 'utf-8', $keywords);
    }

    return $keywords;
}

函数测试

<?php
header("Content-Type: text/html; charset=utf-8");

$referers = array(
        'http://www.baidu.com/s?cl=3&wd=%B9%E9%C0%B4&fr=vid1000',
        'http://www.baidu.com/s?tn=92506501_hao_pg&rtt=1&bsst=1&wd=%B9%E9%C0%B4',
        'http://www.baidu.com/link?url=ctBhF7AAau6LwE61pJOEH-ZhgUM7D3YHYMrm6xIXJlDQtMXCiea7gg49s90Q-Qh8wHD8Ano-dPNhUawBBNEEwEbtu8toMF5k1V7Xy850EtlpZyMcS0e_y-SCJp86iM6e&wd=%E5%BD%92%E6%9D%A5&tn=baidu&ie=utf-8&inputT=2980',
        'http://www.baidu.com/link?url=TIn9NR6fwiy6IwwkCcVF8HhHoxVUpHQsyj1YdlQPy2roXKTnSQS_3UxwvyjZ2JPkpxF8-diSoRCSpODUM_jq2K&wd=%E5%BD%92%E6%9D%A5&tn=baidu&ie=utf-8&input', 

        'http://news.baidu.com/ns?cl=2&rn=20&tn=news&word=%E5%BD%92%E6%9D%A5&ie=utf-8',
        'http://image.baidu.com/i?ct=503316480&z=&tn=baiduimagedetail&ipn=d&word=%E5%BD%92%E6%9D%A5&step_word=&ie=utf-8&in=17668&cl=2&lm=-1&st=&pn=6&rn=1&di=70447907090&ln=1994&fr=news&&fmq=1402285886106_R&ic=&s=&se=&sme=0&tab=&width=&height=&face=&is=&istype=&ist=&jit=&objurl=http%3A%2F%2Fpic31.nipic.com%2F20130713%2F1287761_225159187345_2.jpg',
        'http://v.baidu.com/v?ct=301989888&s=25&ie=utf-8&word=%E5%BD%92%E6%9D%A5',

        'http://www.so.com/s?ie=utf-8&shb=1&src=360sou_newhome&q=%E5%BD%92%E6%9D%A5',
        'http://video.so.com/v?q=%E5%BD%92%E6%9D%A5&src=tab_www',
        'http://image.so.com/v?q=%E5%BD%92%E6%9D%A5&src=tab_video&fromurl=http%3A%2F%2Fndent.oeeee.com%2Fhtml%2F201309%2F16%2F258899.html',
        'http://news.so.com/ns?q=%E5%BD%92%E6%9D%A5&src=tab_video',

        'http://www.sogou.com/web?query=%E5%BD%92%E6%9D%A5&_asf=www.sogou.com&_ast=1402284372&w=01019900&p=40040100&ie=utf8&sut=6558&sst0=1402284372272&lkt=0%2C0%2C0',
        'http://www.sogou.com/web?query=%E5%BD%92%E6%9D%A5&_asf=www.sogou.com&_ast=1402284372&w=01019900&p=40040100&ie=utf8&sut=6558&sst0=1402284372272&lkt=0%2C0%2C0',
        'http://pic.sogou.com/d?query=%B9%E9%C0%B4&mood=0&picformat=0&mode=1&di=0&w=03021800&dr=1&did=1',
        'http://v.sogou.com/v?query=%B9%E9%C0%B4&p=&w=',

        'http://www.baidu.com/s?aaa=bbb',

        'http://www.baidu.com/',

        'http://www.dotcoo.com/',
);

foreach ($referers as $r) {
    $_SERVER['HTTP_REFERER'] = $r;
    echo getKeywords(), "\n";
}

搜索引擎占有比率

http://engine.data.cnzz.com/