Crawling images from web with PHP

Tool

Basic function

https://github.com/votinhthuong/crawler_image_php

Modification

1. get true image name

some sites expose images with query string appended. We need to remove it.
add function get_image_name($img_name) to simple_html_dom.php file.

//remove image appended chars
function get_image_name($img_name){
	$exts = Array('jpg','png','gif','ico','webp');
	$pos = '';
	$imgext = '';
	foreach($exts as $ext){
		$pos = strpos($img_name, $ext);
		if(strlen($img_name) - $pos == strlen($ext)) return $img_name;
		if($pos>0){
			$imgext = $ext;
			break;
		}
	}
	return substr($img_name, 0, $pos).$imgext;
}

Add one more line of code to get the image name in index.php:

  $img_name = get_image_name($img_name);

2. Zip images folder

General zip operations
open a zip file and add files into it
$zip = new ZipArchive;
if ($zip->open('test_new.zip', ZipArchive::CREATE) === TRUE){
    // Add files to the zip file
    $zip->addFile('test.txt');
    $zip->addFile('test.pdf');

    // Add random.txt file to zip and rename it to newfile.txt
    $zip->addFile('random.txt', 'newfile.txt');

    // Add a file new.txt file to zip using the text specified
    $zip->addFromString('new.txt', 'text to be added to the new.txt file');

    // All files are added, so close the zip file.
    $zip->close();
}
Overwrite an existing zip file
$zip = new ZipArchive;
if ($zip->open('test_folder.zip', ZipArchive::CREATE) === TRUE)
{
    // Add files to the zip file inside demo_folder
    $zip->addFile('text.txt', 'demo_folder/test.txt');
    $zip->addFile('test.pdf', 'demo_folder/test.pdf');

    // Add random.txt file to zip and rename it to newfile.txt and store in demo_folder
    $zip->addFile('random.txt', 'demo_folder/newfile.txt');
    // Add a file demo_folder/new.txt file to zip using the text specified
    $zip->addFromString('demo_folder/new.txt', 'text to be added to the new.txt file');

    // All files are added, so close the zip file.
    $zip->close();
}
Create a new zip file and move the files to be in different folders
$zip = new ZipArchive;
if ($zip->open('test_folder_change.zip', ZipArchive::CREATE) === TRUE)
{
    // Add files to the zip file
    $zip->addFile('text.txt', 'demo_folder/test.txt');
    $zip->addFile('test.pdf', 'demo_folder1/test.pdf');

    // All files are added, so close the zip file.
    $zip->close();
}
Create a zip file with all files from a directory
$zip = new ZipArchive;
if ($zip->open('test_dir.zip', ZipArchive::OVERWRITE) === TRUE) {
    if ($handle = opendir('demo_folder')) {
        // Add all files inside the directory
        while (false !== ($entry = readdir($handle))) {
            if ($entry != "." && $entry != ".." && !is_dir('demo_folder/' . $entry)) {
                $zip->addFile('demo_folder/' . $entry);
            }
        }
        closedir($handle);
    }
    $zip->close();
}
Add multiple files and directories to a zip file
$zip = new ZipArchive;
if ($zip->open('test_files_dirs.zip', ZipArchive::OVERWRITE) === TRUE){
    // Add directory1
    if ($handle = opendir('demo_folder/directory1/')) {
        while (false !== ($entry = readdir($handle))) {
            if ($entry != "." && $entry != "..") {
                $zip->addFile('demo_folder/directory1/' . $entry);
            }
        }
        closedir($handle);
    }

    // Add directory2
    if ($handle = opendir('demo_folder/directory2/')) {
        while (false !== ($entry = readdir($handle))) {
            if ($entry != "." && $entry != "..") {
                $zip->addFile('demo_folder/directory2/' . $entry);
            }
        }
        closedir($handle);
    }

    // Add directory3
    if ($handle = opendir('demo_folder/directory3/')) {
        while (false !== ($entry = readdir($handle))) {
            if ($entry != "." && $entry != "..") {
                $zip->addFile('demo_folder/directory3/' . $entry);
            }
        }
        closedir($handle);
    }
    // Add more files



    $zip->addFile('demo_folder/index.txt');
    $zip->close();
}
Zip the image folder
$zip = new ZipArchive;
if ($zip->open($zipfile.'.zip', ZipArchive::CREATE) === TRUE) {
    if ($handle = opendir($zipfile.'/')) {
        // Add all files inside the directory
        while (false !== ($entry = readdir($handle))) {
            if ($entry != "." && $entry != ".." && !is_dir($zipfile.'/' . $entry)) {
                $zip->addFile($zipfile.'/' . $entry);
                //unlink($zipfile.'/' . $entry);   //delete the file after zipped
            }
        }
        closedir($handle);
    }
    $zip->close();
    deleteDir($zipfile);   //delete the file after zipped
}

Applied site

211 Tools

Reference

 401 total views

Author: Albert

Leave a Reply