PHP抓取网站ico图标
网站许久没用更新,以后会经常更新,
本次分享一个使用PHP抓取网站ico的程序,
提供一个网站列表后对网站的ico进行下载抓取,具体代码如下:
<?php /** * 更新热站ico * gao 2015-03-24 */ error_reporting(0); // 加载ICO抓取名单,需要区分HTTP和HTTPS类型网站,一行一个网站 // 如:http://yun.baidu.com https://wx.qq.com $handle = fopen('site.txt', 'r'); if($handle) { $success_ico = array(); $faild_ico = array(); $count = 0; unlink('success.ico.txt'); unlink('faild.ico.txt'); while( ($url = fgets($handle, 4096)) !== false ) { $count++; $url = trim($url); $scheme = parse_url($url, PHP_URL_SCHEME); $host = parse_url($url, PHP_URL_HOST); if(file_exists("ico/{$host}.ico")) { file_put_contents('success.ico.txt', $url . PHP_EOL, FILE_APPEND); echo "N{$count} [continue] {$host}.ico" . PHP_EOL; continue; } $ico = curl_get("{$scheme}://{$host}/favicon.ico"); if(!is_ico_image($ico)) { // 抓取首页匹配是否有自定义ICO $ico_html = curl_get("{$scheme}://{$host}/"); preg_match('/href=\"(.*?)\.ico/i', $ico_html, $match); // 匹配HTTP/HTTPS类型ICO,匹配相对路径和绝对路径ICO if($match[1]) { $url = substr($match[1], 0, 4) == 'http' ? $match[1] : $scheme . '://' . $host . $match[1]; $url.= '.ico'; $ico = curl_get($url); } } if(is_ico_image($ico)) { file_put_contents("ico/{$host}.ico", $ico); file_put_contents('success.ico.txt', $url . PHP_EOL, FILE_APPEND); echo "N{$count} [success] {$host}.ico" . PHP_EOL; } else { file_put_contents('faild.ico.txt', $url . PHP_EOL, FILE_APPEND); echo "N{$count} [faild] {$host}.ico" . PHP_EOL; } } fclose ($handle); } // 判断是否是图片,可能是404页面 function is_ico_image($ico) { if($ico) { file_put_contents('ico_tmp/favicon.ico', $ico); $type = getimagesize('ico_tmp/favicon.ico'); unlink('ico_tmp/favicon.ico'); if($type) { return true; } } return false; } // 使用curl模拟GET抓取网站ico信息 function curl_get($url) { $ch = curl_init(); curl_setopt($ch, CURLOPT_URL, $url); curl_setopt($ch, CURLOPT_HEADER, false); // 不需要header curl_setopt($ch, CURLOPT_RETURNTRANSFER, true); // 不自动输出 curl_setopt($ch, CURLOPT_HTTPHEADER, array('X-FORWARDED-FOR:8.8.8.8', 'CLIENT-IP:8.8.8.8')); // 模拟header curl_setopt($ch, CURLOPT_USERAGENT, "Mozilla/5.0 (Windows NT 6.1) AppleWebKit/536.11 (KHTML, like Gecko) Chrome/20.0.1132.57 Safari/536.11"); curl_setopt($ch, CURLOPT_REFERER, $url); curl_setopt($ch, CURLOPT_TIMEOUT, 10); // 设置每个请求的超时时间 $data = curl_exec($ch); curl_close($ch); return $data; }
PHP 下载保存图片
// 下载保存图片 function save_favicon($url, $filepath) { $filename = realpath($url); // 要下载保存的文件网址 Header( "Content-type: application/octet-stream "); Header( "Accept-Ranges: bytes "); Header( "Accept-Length: " .filesize($filename)); header( "Content-Disposition: attachment; filename= {$filepath}"); // echo file_get_contents($filename); readfile($filename); } function download($url, $path = '') { $ch = curl_init(); curl_setopt($ch, CURLOPT_URL, $url); curl_setopt($ch, CURLOPT_RETURNTRANSFER, 1); curl_setopt($ch, CURLOPT_CONNECTTIMEOUT, 30); curl_setopt($ch, CURLOPT_TIMEOUT, 30); // 设置每个请求的超时时间 $file = curl_exec($ch); curl_close($ch); $filename = pathinfo($url, PATHINFO_BASENAME); $resource = fopen($path . $filename, 'a'); fwrite($resource, $file); fclose($resource); } save_favicon("https://api.byi.pw/favicon/?url=9gdj.com", "../favicon/9gdj.com.ico"); download("https://api.byi.pw/favicon/?url=9gdj.com"); download("https://mimvp.com/favicon.ico");
URL是远程的完整图片地址
//URL是远程的完整图片地址,不能为空, $filename 是另存为的图片名字 //默认把图片放在以此脚本相同的目录里 function GrabImage($url, $filename=""){ //$url 为空则返回 false; if($url == ""){return false;} $ext = strrchr($url, ".");//得到图片的扩展名 echo 'ext ; ' . $ext; if($ext != ".gif" && $ext != ".jpg" && $ext != ".bmp"){ echo "格式不支持!"; return false; } if($filename == ""){ $filename = time()."$ext"; }//以时间戳另起名 //开始捕捉 ob_start(); readfile($url); $img = ob_get_contents(); ob_end_clean(); $size = strlen($img); $fp2 = fopen($filename , "a"); fwrite($fp2, $img); fclose($fp2); return $filename; } //测试 GrabImage("http://www.jb51.net/images/logo.gif", "mimvp.gif");
php图片裁剪,叠加,生成ico
// **************************************** // // 功能:图片叠加 // 参数: $dst 背景图片地址 // src 叠加图片地址 // newfile 另存图片文件名 // left 距离背景图片左边的距离 // top 距离背景图片上部的距离 // **************************************** // function superimposedPng($dst,$src,$newfile,$left=null,$top=null) { //得到原始图片信息 $dst_im = imagecreatefrompng($dst); $dst_info = getimagesize($dst); imagesavealpha($dst_im, true); //水印图像 $src_im = imagecreatefrompng($src); $src_info = getimagesize($src); if(empty($left)){ $left = $dst_info[0]-$src_info[0]; } if(empty($top)){ $top = $dst_info[1]-$src_info[1]; } //合并水印图片 imagecopy($dst_im,$src_im,$left,$top,0,0,$src_info[0],$src_info[1]); //输出合并后水印图片 imagepng($dst_im, $newfile); imagedestroy($dst_im); imagedestroy($src_im); } // **************************************** // // 功能:图片叠加 // 参数: $dst 背景图片地址 // src png图片地址 // size ico的大小 // filename 转换的ico的名字 // **************************************** // function covertPngToIco($src,$size,$filename) { $im = imagecreatefrompng($src); $imginfo = getimagesize($src); $resize_im = @imagecreatetruecolor($size,$size); imagealphablending($resize_im, false); imagecolortransparent($resize_im, imagecolorallocatealpha($resize_im, 0, 0, 0,0)); imagecopyresampled($resize_im,$im,0,0,0,0,$size,$size,$imginfo[0],$imginfo[1]); include "phpthumb.ico.php"; $icon = new phpthumb_ico(); $gd_image_array = array($resize_im); $icon_data = $icon->GD2ICOstring($gd_image_array); $filename = $filename.".ico"; //保存ico file_put_contents($filename, $icon_data); } // **************************************** // // 功能:重置图片大小 // 参数: $im 图片值 // maxwidth 转换长度 // maxheight 转换高度 // name 转换的名字 // filetype 转换类型 // **************************************** // function resizeImage($img, $w, $h, $newfilename) { //Check if GD extension is loaded if (!extension_loaded('gd') && !extension_loaded('gd2')) { trigger_error("GD is not loaded", E_USER_WARNING); return false; } //Get Image size info $imgInfo = getimagesize($img); switch ($imgInfo[2]) { case 1: $im = imagecreatefromgif($img); break; case 2: $im = imagecreatefromjpeg($img); break; case 3: $im = imagecreatefrompng($img); break; default: trigger_error('Unsupported filetype!', E_USER_WARNING); break; } //If image dimension is smaller, do not resize if ($imgInfo[0] < = $w && $imgInfo[1] <= $h) { $nHeight = $imgInfo[1]; $nWidth = $imgInfo[0]; }else{ //yeah, resize it, but keep it proportional if ($w/$imgInfo[0] > $h/$imgInfo[1]) { $nWidth = $w; $nHeight = $imgInfo[1]*($w/$imgInfo[0]); }else{ $nWidth = $imgInfo[0]*($h/$imgInfo[1]); $nHeight = $h; } } $nWidth = round($nWidth); $nHeight = round($nHeight); $newImg = imagecreatetruecolor($nWidth, $nHeight); /* Check if this image is PNG or GIF, then set if Transparent*/ if(($imgInfo[2] == 1) OR ($imgInfo[2]==3)){ imagealphablending($newImg, false); imagesavealpha($newImg,true); $transparent = imagecolorallocatealpha($newImg, 255, 255, 255, 127); imagefilledrectangle($newImg, 0, 0, $nWidth, $nHeight, $transparent); } imagecopyresampled($newImg, $im, 0, 0, 0, 0, $nWidth, $nHeight, $imgInfo[0], $imgInfo[1]); //Generate the file, and rename it to $newfilename switch ($imgInfo[2]) { case 1: imagegif($newImg,$newfilename); break; case 2: imagejpeg($newImg,$newfilename); break; case 3: imagepng($newImg,$newfilename); break; default: trigger_error('Failed resize image!', E_USER_WARNING); break; } return $newfilename; } //原始图像 $logo = "su.png"; resizeImage("ok.png","30","30","ok.png"); superimposedPng($logo, "ok.png","su.png"); covertPngToIco("su.png",64,"ok");
代码方法是总结的php图片(image)缩放,叠加, 生成ico的代码, 代码中主要是处理png的情况。 对png图片生成后变黑,png透明进行了处理。 需要处理jpeg,jif图片需要要修改下imagecreatefrompng为相应的imagecreatefromjpg, imagecreatefromjif方法。
在使用过程中参考网址,以方便各位参考
png透明处理 http://www.phpweblog.net/young40/archive/2008/11/26/6124.html
png图片变黑 http://hi.baidu.com/guantong_gt/blog/item/f35bbade73a6341d48540378.html
png生成ico http://www.jb51.net/article/12458.htm
png缩放图片 http://www.akemapa.com/2008/07/10/php-gd-resize-transparent-image-png-gif/
参考推荐:
PHP 获取网页标题(title)、描述(description)、关键字(keywords)等meta信息
PHP + Selenium + WebDriver 抓取米扑科技首页
Python + Selenium2 + Chrome 爬取网页
selenium+php-webdriver实现抓取淘宝页面
PHP 路径详解 dirname,realpath,__FILE__,getcwd
PHP 文件导入 require, require_once, include, include_once 区别
版权所有: 本文系米扑博客原创、转载、摘录,或修订后发表,最后更新于 2019-12-07 18:22:43
侵权处理: 本个人博客,不盈利,若侵犯了您的作品权,请联系博主删除,莫恶意,索钱财,感谢!
转载注明: PHP抓取网站ico图标 (米扑博客)
确实你好久没有更新网站了,哈哈