PHP抓取网站ico图标
网站许久没用更新,以后会经常更新,
本次分享一个使用PHP抓取网站ico的程序,
提供一个网站列表后对网站的ico进行下载抓取,具体代码如下:
<?php
/**
* 更新热站ico
* gao 2015-03-24
*/
error_reporting(0);
// 加载ICO抓取名单,需要区分HTTP和HTTPS类型网站,一行一个网站
// 如:http://yun.baidu.com https://wx.qq.com
$handle = fopen('site.txt', 'r');
if($handle)
{
$success_ico = array();
$faild_ico = array();
$count = 0;
unlink('success.ico.txt');
unlink('faild.ico.txt');
while( ($url = fgets($handle, 4096)) !== false )
{
$count++;
$url = trim($url);
$scheme = parse_url($url, PHP_URL_SCHEME);
$host = parse_url($url, PHP_URL_HOST);
if(file_exists("ico/{$host}.ico"))
{
file_put_contents('success.ico.txt', $url . PHP_EOL, FILE_APPEND);
echo "N{$count} [continue] {$host}.ico" . PHP_EOL;
continue;
}
$ico = curl_get("{$scheme}://{$host}/favicon.ico");
if(!is_ico_image($ico))
{
// 抓取首页匹配是否有自定义ICO
$ico_html = curl_get("{$scheme}://{$host}/");
preg_match('/href=\"(.*?)\.ico/i', $ico_html, $match);
// 匹配HTTP/HTTPS类型ICO,匹配相对路径和绝对路径ICO
if($match[1])
{
$url = substr($match[1], 0, 4) == 'http' ? $match[1] : $scheme . '://' . $host . $match[1];
$url.= '.ico';
$ico = curl_get($url);
}
}
if(is_ico_image($ico))
{
file_put_contents("ico/{$host}.ico", $ico);
file_put_contents('success.ico.txt', $url . PHP_EOL, FILE_APPEND);
echo "N{$count} [success] {$host}.ico" . PHP_EOL;
}
else
{
file_put_contents('faild.ico.txt', $url . PHP_EOL, FILE_APPEND);
echo "N{$count} [faild] {$host}.ico" . PHP_EOL;
}
}
fclose ($handle);
}
// 判断是否是图片,可能是404页面
function is_ico_image($ico)
{
if($ico)
{
file_put_contents('ico_tmp/favicon.ico', $ico);
$type = getimagesize('ico_tmp/favicon.ico');
unlink('ico_tmp/favicon.ico');
if($type)
{
return true;
}
}
return false;
}
// 使用curl模拟GET抓取网站ico信息
function curl_get($url)
{
$ch = curl_init();
curl_setopt($ch, CURLOPT_URL, $url);
curl_setopt($ch, CURLOPT_HEADER, false); // 不需要header
curl_setopt($ch, CURLOPT_RETURNTRANSFER, true); // 不自动输出
curl_setopt($ch, CURLOPT_HTTPHEADER, array('X-FORWARDED-FOR:8.8.8.8', 'CLIENT-IP:8.8.8.8')); // 模拟header
curl_setopt($ch, CURLOPT_USERAGENT, "Mozilla/5.0 (Windows NT 6.1) AppleWebKit/536.11 (KHTML, like Gecko) Chrome/20.0.1132.57 Safari/536.11");
curl_setopt($ch, CURLOPT_REFERER, $url);
curl_setopt($ch, CURLOPT_TIMEOUT, 10); // 设置每个请求的超时时间
$data = curl_exec($ch);
curl_close($ch);
return $data;
}
PHP 下载保存图片
// 下载保存图片
function save_favicon($url, $filepath) {
$filename = realpath($url); // 要下载保存的文件网址
Header( "Content-type: application/octet-stream ");
Header( "Accept-Ranges: bytes ");
Header( "Accept-Length: " .filesize($filename));
header( "Content-Disposition: attachment; filename= {$filepath}");
// echo file_get_contents($filename);
readfile($filename);
}
function download($url, $path = '')
{
$ch = curl_init();
curl_setopt($ch, CURLOPT_URL, $url);
curl_setopt($ch, CURLOPT_RETURNTRANSFER, 1);
curl_setopt($ch, CURLOPT_CONNECTTIMEOUT, 30);
curl_setopt($ch, CURLOPT_TIMEOUT, 30); // 设置每个请求的超时时间
$file = curl_exec($ch);
curl_close($ch);
$filename = pathinfo($url, PATHINFO_BASENAME);
$resource = fopen($path . $filename, 'a');
fwrite($resource, $file);
fclose($resource);
}
save_favicon("https://api.byi.pw/favicon/?url=9gdj.com", "../favicon/9gdj.com.ico");
download("https://api.byi.pw/favicon/?url=9gdj.com");
download("https://mimvp.com/favicon.ico");
URL是远程的完整图片地址
//URL是远程的完整图片地址,不能为空, $filename 是另存为的图片名字
//默认把图片放在以此脚本相同的目录里
function GrabImage($url, $filename=""){
//$url 为空则返回 false;
if($url == ""){return false;}
$ext = strrchr($url, ".");//得到图片的扩展名
echo 'ext ; ' . $ext;
if($ext != ".gif" && $ext != ".jpg" && $ext != ".bmp"){
echo "格式不支持!";
return false;
}
if($filename == ""){
$filename = time()."$ext";
}//以时间戳另起名
//开始捕捉
ob_start();
readfile($url);
$img = ob_get_contents();
ob_end_clean();
$size = strlen($img);
$fp2 = fopen($filename , "a");
fwrite($fp2, $img);
fclose($fp2);
return $filename;
}
//测试
GrabImage("http://www.jb51.net/images/logo.gif", "mimvp.gif");
php图片裁剪,叠加,生成ico
// **************************************** //
// 功能:图片叠加
// 参数: $dst 背景图片地址
// src 叠加图片地址
// newfile 另存图片文件名
// left 距离背景图片左边的距离
// top 距离背景图片上部的距离
// **************************************** //
function superimposedPng($dst,$src,$newfile,$left=null,$top=null)
{
//得到原始图片信息
$dst_im = imagecreatefrompng($dst);
$dst_info = getimagesize($dst);
imagesavealpha($dst_im, true);
//水印图像
$src_im = imagecreatefrompng($src);
$src_info = getimagesize($src);
if(empty($left)){
$left = $dst_info[0]-$src_info[0];
}
if(empty($top)){
$top = $dst_info[1]-$src_info[1];
}
//合并水印图片
imagecopy($dst_im,$src_im,$left,$top,0,0,$src_info[0],$src_info[1]);
//输出合并后水印图片
imagepng($dst_im, $newfile);
imagedestroy($dst_im);
imagedestroy($src_im);
}
// **************************************** //
// 功能:图片叠加
// 参数: $dst 背景图片地址
// src png图片地址
// size ico的大小
// filename 转换的ico的名字
// **************************************** //
function covertPngToIco($src,$size,$filename)
{
$im = imagecreatefrompng($src);
$imginfo = getimagesize($src);
$resize_im = @imagecreatetruecolor($size,$size);
imagealphablending($resize_im, false);
imagecolortransparent($resize_im, imagecolorallocatealpha($resize_im, 0, 0, 0,0));
imagecopyresampled($resize_im,$im,0,0,0,0,$size,$size,$imginfo[0],$imginfo[1]);
include "phpthumb.ico.php";
$icon = new phpthumb_ico();
$gd_image_array = array($resize_im);
$icon_data = $icon->GD2ICOstring($gd_image_array);
$filename = $filename.".ico";
//保存ico
file_put_contents($filename, $icon_data);
}
// **************************************** //
// 功能:重置图片大小
// 参数: $im 图片值
// maxwidth 转换长度
// maxheight 转换高度
// name 转换的名字
// filetype 转换类型
// **************************************** //
function resizeImage($img, $w, $h, $newfilename) {
//Check if GD extension is loaded
if (!extension_loaded('gd') && !extension_loaded('gd2')) {
trigger_error("GD is not loaded", E_USER_WARNING);
return false;
}
//Get Image size info
$imgInfo = getimagesize($img);
switch ($imgInfo[2]) {
case 1: $im = imagecreatefromgif($img); break;
case 2: $im = imagecreatefromjpeg($img); break;
case 3: $im = imagecreatefrompng($img); break;
default: trigger_error('Unsupported filetype!', E_USER_WARNING); break;
}
//If image dimension is smaller, do not resize
if ($imgInfo[0] < = $w && $imgInfo[1] <= $h) {
$nHeight = $imgInfo[1];
$nWidth = $imgInfo[0];
}else{
//yeah, resize it, but keep it proportional
if ($w/$imgInfo[0] > $h/$imgInfo[1]) {
$nWidth = $w;
$nHeight = $imgInfo[1]*($w/$imgInfo[0]);
}else{
$nWidth = $imgInfo[0]*($h/$imgInfo[1]);
$nHeight = $h;
}
}
$nWidth = round($nWidth);
$nHeight = round($nHeight);
$newImg = imagecreatetruecolor($nWidth, $nHeight);
/* Check if this image is PNG or GIF, then set if Transparent*/
if(($imgInfo[2] == 1) OR ($imgInfo[2]==3)){
imagealphablending($newImg, false);
imagesavealpha($newImg,true);
$transparent = imagecolorallocatealpha($newImg, 255, 255, 255, 127);
imagefilledrectangle($newImg, 0, 0, $nWidth, $nHeight, $transparent);
}
imagecopyresampled($newImg, $im, 0, 0, 0, 0, $nWidth, $nHeight, $imgInfo[0], $imgInfo[1]);
//Generate the file, and rename it to $newfilename
switch ($imgInfo[2]) {
case 1: imagegif($newImg,$newfilename); break;
case 2: imagejpeg($newImg,$newfilename); break;
case 3: imagepng($newImg,$newfilename); break;
default: trigger_error('Failed resize image!', E_USER_WARNING); break;
}
return $newfilename;
}
//原始图像
$logo = "su.png";
resizeImage("ok.png","30","30","ok.png");
superimposedPng($logo, "ok.png","su.png");
covertPngToIco("su.png",64,"ok");
代码方法是总结的php图片(image)缩放,叠加, 生成ico的代码, 代码中主要是处理png的情况。 对png图片生成后变黑,png透明进行了处理。 需要处理jpeg,jif图片需要要修改下imagecreatefrompng为相应的imagecreatefromjpg, imagecreatefromjif方法。
在使用过程中参考网址,以方便各位参考
png透明处理 http://www.phpweblog.net/young40/archive/2008/11/26/6124.html
png图片变黑 http://hi.baidu.com/guantong_gt/blog/item/f35bbade73a6341d48540378.html
png生成ico http://www.jb51.net/article/12458.htm
png缩放图片 http://www.akemapa.com/2008/07/10/php-gd-resize-transparent-image-png-gif/
参考推荐:
PHP 获取网页标题(title)、描述(description)、关键字(keywords)等meta信息
PHP + Selenium + WebDriver 抓取米扑科技首页
Python + Selenium2 + Chrome 爬取网页
selenium+php-webdriver实现抓取淘宝页面
PHP 路径详解 dirname,realpath,__FILE__,getcwd
PHP 文件导入 require, require_once, include, include_once 区别
版权所有: 本文系米扑博客原创、转载、摘录,或修订后发表,最后更新于 2019-12-07 18:22:43
侵权处理: 本个人博客,不盈利,若侵犯了您的作品权,请联系博主删除,莫恶意,索钱财,感谢!
转载注明: PHP抓取网站ico图标 (米扑博客)
确实你好久没有更新网站了,哈哈