网站许久没用更新,以后会经常更新,

本次分享一个使用PHP抓取网站ico的程序,

提供一个网站列表后对网站的ico进行下载抓取,具体代码如下:

<?php
/**
* 更新热站ico
* gao 2015-03-24
*/

error_reporting(0);

// 加载ICO抓取名单,需要区分HTTP和HTTPS类型网站,一行一个网站
// 如:http://yun.baidu.com https://wx.qq.com
$handle = fopen('site.txt', 'r');

if($handle)
{
    $success_ico = array();
    $faild_ico   = array();
    $count       = 0;
    
    unlink('success.ico.txt');
    unlink('faild.ico.txt');
    
    while( ($url = fgets($handle, 4096)) !== false )
    {
        $count++;
        $url    = trim($url);        
        $scheme = parse_url($url, PHP_URL_SCHEME);
        $host   = parse_url($url, PHP_URL_HOST);
        
        if(file_exists("ico/{$host}.ico"))
        {
            file_put_contents('success.ico.txt',  $url . PHP_EOL, FILE_APPEND);
            echo "N{$count} [continue] {$host}.ico" . PHP_EOL;
            continue;
        }
        
        $ico    = curl_get("{$scheme}://{$host}/favicon.ico");
        
        if(!is_ico_image($ico))
        {
            // 抓取首页匹配是否有自定义ICO
            $ico_html = curl_get("{$scheme}://{$host}/");
            preg_match('/href=\"(.*?)\.ico/i', $ico_html, $match);
            // 匹配HTTP/HTTPS类型ICO,匹配相对路径和绝对路径ICO
            if($match[1])
            {
                $url = substr($match[1], 0, 4) == 'http' ? $match[1] : $scheme . '://' . $host . $match[1];
                $url.= '.ico';
                $ico = curl_get($url);
            }
        }
        
        if(is_ico_image($ico))
        {
            file_put_contents("ico/{$host}.ico", $ico);
            file_put_contents('success.ico.txt',  $url . PHP_EOL, FILE_APPEND);
            echo "N{$count} [success] {$host}.ico" . PHP_EOL;
        }
        else
        {
            file_put_contents('faild.ico.txt',  $url . PHP_EOL, FILE_APPEND);
            echo "N{$count} [faild] {$host}.ico" . PHP_EOL;
        }
    }
    
    fclose ($handle);
    
}

// 判断是否是图片,可能是404页面
function is_ico_image($ico)
{
    if($ico)
    {
        file_put_contents('ico_tmp/favicon.ico', $ico);
        $type = getimagesize('ico_tmp/favicon.ico');
        unlink('ico_tmp/favicon.ico');
        if($type)
        {
            return true;
        }
    }
    return false;
}

// 使用curl模拟GET抓取网站ico信息
function curl_get($url)
{
    $ch = curl_init();
    curl_setopt($ch, CURLOPT_URL, $url); 
    curl_setopt($ch, CURLOPT_HEADER, false);  // 不需要header
    curl_setopt($ch, CURLOPT_RETURNTRANSFER, true); // 不自动输出  
    curl_setopt($ch, CURLOPT_HTTPHEADER, array('X-FORWARDED-FOR:8.8.8.8', 'CLIENT-IP:8.8.8.8'));  // 模拟header
    curl_setopt($ch, CURLOPT_USERAGENT, "Mozilla/5.0 (Windows NT 6.1) AppleWebKit/536.11 (KHTML, like Gecko) Chrome/20.0.1132.57 Safari/536.11");
    curl_setopt($ch, CURLOPT_REFERER, $url);
    curl_setopt($ch, CURLOPT_TIMEOUT, 10); // 设置每个请求的超时时间
    $data = curl_exec($ch);
    curl_close($ch);
    return $data;
}

 

 

PHP 下载保存图片

// 下载保存图片
function save_favicon($url, $filepath) {
	$filename = realpath($url); 	// 要下载保存的文件网址
	Header( "Content-type:  application/octet-stream ");
	Header( "Accept-Ranges:  bytes ");
	Header( "Accept-Length: " .filesize($filename));
	header( "Content-Disposition:  attachment;  filename= {$filepath}");
// 	echo file_get_contents($filename);
	readfile($filename);  
}


function download($url, $path = '')
{
	$ch = curl_init();
	curl_setopt($ch, CURLOPT_URL, $url);
	curl_setopt($ch, CURLOPT_RETURNTRANSFER, 1);
	curl_setopt($ch, CURLOPT_CONNECTTIMEOUT, 30);
	curl_setopt($ch, CURLOPT_TIMEOUT, 30); // 设置每个请求的超时时间
	$file = curl_exec($ch);
	curl_close($ch);
	$filename = pathinfo($url, PATHINFO_BASENAME);
	$resource = fopen($path . $filename, 'a');
	fwrite($resource, $file);
	fclose($resource);
}


save_favicon("https://api.byi.pw/favicon/?url=9gdj.com", "../favicon/9gdj.com.ico");


download("https://api.byi.pw/favicon/?url=9gdj.com");
download("https://mimvp.com/favicon.ico");

 

 

URL是远程的完整图片地址

//URL是远程的完整图片地址,不能为空, $filename 是另存为的图片名字
//默认把图片放在以此脚本相同的目录里
function GrabImage($url, $filename=""){
	//$url 为空则返回 false;
	if($url == ""){return false;}
	$ext = strrchr($url, ".");//得到图片的扩展名
	echo 'ext ; ' . $ext;
	
	if($ext != ".gif" && $ext != ".jpg" && $ext != ".bmp"){
		echo "格式不支持!";
		return false;
	}
	
	if($filename == ""){
		$filename = time()."$ext";
	}//以时间戳另起名
	
	//开始捕捉
	ob_start();
	readfile($url);
	$img = ob_get_contents();
	ob_end_clean();
	$size = strlen($img);
	$fp2 = fopen($filename , "a");
	fwrite($fp2, $img);
	fclose($fp2);
	return $filename;
}
//测试
GrabImage("http://www.jb51.net/images/logo.gif", "mimvp.gif"); 

 

 

php图片裁剪,叠加,生成ico

// **************************************** // 
// 功能:图片叠加
// 参数: $dst 背景图片地址
// src 叠加图片地址
// newfile 另存图片文件名
// left 距离背景图片左边的距离
// top 距离背景图片上部的距离
// **************************************** // 
function superimposedPng($dst,$src,$newfile,$left=null,$top=null)
{
//得到原始图片信息
$dst_im = imagecreatefrompng($dst);
$dst_info = getimagesize($dst);
imagesavealpha($dst_im, true);
//水印图像
$src_im = imagecreatefrompng($src);
$src_info = getimagesize($src);

if(empty($left)){
    $left = $dst_info[0]-$src_info[0];
}
if(empty($top)){
    $top = $dst_info[1]-$src_info[1];
}

//合并水印图片
imagecopy($dst_im,$src_im,$left,$top,0,0,$src_info[0],$src_info[1]);

//输出合并后水印图片
imagepng($dst_im, $newfile);
imagedestroy($dst_im);
imagedestroy($src_im);
}



// **************************************** // 
// 功能:图片叠加
// 参数: $dst 背景图片地址
// src png图片地址
// size ico的大小
// filename 转换的ico的名字
// **************************************** // 
function covertPngToIco($src,$size,$filename)
{
$im = imagecreatefrompng($src);
$imginfo = getimagesize($src);

$resize_im = @imagecreatetruecolor($size,$size);

imagealphablending($resize_im, false);
imagecolortransparent($resize_im, imagecolorallocatealpha($resize_im, 0, 0, 0,0));

imagecopyresampled($resize_im,$im,0,0,0,0,$size,$size,$imginfo[0],$imginfo[1]);
include "phpthumb.ico.php";
$icon = new phpthumb_ico();
$gd_image_array = array($resize_im);
$icon_data = $icon->GD2ICOstring($gd_image_array);
$filename = $filename.".ico";
//保存ico
file_put_contents($filename, $icon_data);
}



// **************************************** // 
// 功能:重置图片大小
// 参数: $im 图片值
// maxwidth 转换长度
// maxheight 转换高度
// name 转换的名字
// filetype 转换类型
// **************************************** // 
function resizeImage($img, $w, $h, $newfilename) {
 
 //Check if GD extension is loaded
 if (!extension_loaded('gd') && !extension_loaded('gd2')) {
  trigger_error("GD is not loaded", E_USER_WARNING);
  return false;
 }
 
 //Get Image size info
 $imgInfo = getimagesize($img);
 switch ($imgInfo[2]) {
  case 1: $im = imagecreatefromgif($img); break;
  case 2: $im = imagecreatefromjpeg($img);  break;
  case 3: $im = imagecreatefrompng($img); break;
  default:  trigger_error('Unsupported filetype!', E_USER_WARNING);  break;
 }
 
 //If image dimension is smaller, do not resize
 if ($imgInfo[0] < = $w && $imgInfo[1] <= $h) {
  $nHeight = $imgInfo[1];
  $nWidth = $imgInfo[0];
 }else{
                //yeah, resize it, but keep it proportional
  if ($w/$imgInfo[0] > $h/$imgInfo[1]) {
   $nWidth = $w;
   $nHeight = $imgInfo[1]*($w/$imgInfo[0]);
  }else{
   $nWidth = $imgInfo[0]*($h/$imgInfo[1]);
   $nHeight = $h;
  }
 }
 $nWidth = round($nWidth);
 $nHeight = round($nHeight);
 
 $newImg = imagecreatetruecolor($nWidth, $nHeight);
 
 /* Check if this image is PNG or GIF, then set if Transparent*/  
 if(($imgInfo[2] == 1) OR ($imgInfo[2]==3)){
  imagealphablending($newImg, false);
  imagesavealpha($newImg,true);
  $transparent = imagecolorallocatealpha($newImg, 255, 255, 255, 127);
  imagefilledrectangle($newImg, 0, 0, $nWidth, $nHeight, $transparent);
 }
 imagecopyresampled($newImg, $im, 0, 0, 0, 0, $nWidth, $nHeight, $imgInfo[0], $imgInfo[1]);
 
 //Generate the file, and rename it to $newfilename
 switch ($imgInfo[2]) {
  case 1: imagegif($newImg,$newfilename); break;
  case 2: imagejpeg($newImg,$newfilename);  break;
  case 3: imagepng($newImg,$newfilename); break;
  default:  trigger_error('Failed resize image!', E_USER_WARNING);  break;
 }
   
   return $newfilename;
}

//原始图像
$logo = "su.png";
resizeImage("ok.png","30","30","ok.png");

superimposedPng($logo, "ok.png","su.png");

covertPngToIco("su.png",64,"ok");

 

代码方法是总结的php图片(image)缩放,叠加, 生成ico的代码, 代码中主要是处理png的情况。 对png图片生成后变黑,png透明进行了处理。 需要处理jpeg,jif图片需要要修改下imagecreatefrompng为相应的imagecreatefromjpg, imagecreatefromjif方法。

 

在使用过程中参考网址,以方便各位参考

png透明处理  http://www.phpweblog.net/young40/archive/2008/11/26/6124.html

png图片变黑 http://hi.baidu.com/guantong_gt/blog/item/f35bbade73a6341d48540378.html

png生成ico   http://www.jb51.net/article/12458.htm

png缩放图片 http://www.akemapa.com/2008/07/10/php-gd-resize-transparent-image-png-gif/

 

 

参考推荐

PHP 获取网页标题(title)、描述(description)、关键字(keywords)等meta信息

PHP + Selenium + WebDriver 抓取米扑科技首页

Python+Selenium2 搭建自动化测试环境

selenium实现Xvfb在linux上无界面运行

Python + Selenium2 + Chrome 爬取网页

selenium+php-webdriver实现抓取淘宝页面

PHP 文件操作常用函数

PHP 常用函数总结(数组,字符串,时间,文件操作)

PHP 路径详解 dirname,realpath,__FILE__,getcwd

PHP 下载保存文件到本地

PHP下载远程图片

PHP抓取网站ico图标

PHP 文件导入 require, require_once, include, include_once 区别