#!/usr/bin/perl -w # # Fetch the first page worth of images found by Google search. # # To do: # filter by size, containing arg tokens # option to do multiple pages # make/use dir named after args # # 2007~05: Written by Steven J. DeRose. # 2009-04-13 sjd: Fix -filter, add -f and -key, test for error pages. # Typical google image search result url: # http://images.google.com/images?client=safari&rls=en&q=leighton+raphaelite # &ie=UTF-8&oe=UTF-8&um=1&sa=N&tab=wi # http://images.google.com/images?q=leighton+raphaelite # &svnum=10 # &um=1 # &hl=en # &safe=off # &client=safari # &rls=en # &start=20 # &sa=N # &ndsp=20 use strict; use Getopt::Long; my $version = "2010-09-12"; my $google = "http://images.google.com/images?q="; my $agent = "Mozilla/4.0"; my $f = ""; my $filter = 0; my $key = ""; my $makedir = 0; my $maxtime = 0; my $numpages = 1; my $quiet = 0; my $requireTokens = 0; my $safe = 0; my $startpage = 1; my $verbose = 0; #Getopt::Long::Configure ("ignore_case"); my $result = GetOptions( "q" => \$quiet, "f=s" => \$f, "filter" => \$filter, "help" => sub { showUsage(); }, "key=s" => \$key, "m=n" => \$maxtime, "mkdir" => \$makedir, "n=n" => \$numpages, "r" => \$requireTokens, "s=n" => \$startpage, "safe" => \$safe, "v+" => \$verbose, "version" => sub { warn "Version of $version, by Steven J. DeRose\n"; exit; } ); # options my @tokens = (); if ($f) { ($verbose) && warn "Got -f = '$f'.\n"; (-f $f) || die "Can't find file '$f' for -f option.\n"; open FL, $f || die "Can't open file '$f'.\n"; while (my $tline = ) { if ($tline =~ m/^\s*\#/) { next; } ($verbose) && warn "\n\n******* Starting $tline"; @tokens = split(/[\s_]+/, $tline); runit(); } } else { (scalar @ARGV) || die "No search words specified.\n"; @tokens = @ARGV; runit(); } exit; sub runit() { ($verbose) && warn "\nStarting search for tokens in " . join(" ",@tokens) . ".\n"; for my $t (@tokens) { $t =~ s/_/ /g; } my $matchfilter = join("\\|", @tokens); $matchfilter =~ s/_/\\|/; my $cleanList = " | splitat | grep '^ $tmpfile.clean"; open F, "<$tmpfile.clean"; my $fct = 0; my $filtered = 0; while (my $l = ) { chomp $l; $l =~ s/(['"<>[]!~#|\\;?$*])/\\\1/g; # Escape special chars if ($filter && $l !~ m/$matchfilter/i) { $filtered++; next; } ($verbose) && warn " Fetching '$l'\n"; system "curl -O '$l' 2>/dev/null" || warn "Curl failed for $l\n";; (my $justname = $l) =~ s|^.*/||; if (!-e $justname) { warn "Can't see file '$justname'.\n"; } elsif (`grep -c -i ' Run repeatedly, with tokens as from each line of specified file. -filter Fetch only images that have at least one of the search-tokens. in their names (ignoring case). =item * B<-key k> Add this keyword to all searches (use with -f). =item * B<-m n> Pass -m (maxtime) option on to curl. (untested) =item * B<-mkdir> Make a directory named for the search-tokens. =item * B<-s n> Start at Google result page n (1). =item * B<-safe> Set Google safe-search flag. =item * B<-n n> Do n Google result pages (1). "; }