#!/usr/bin/perl -w # # Keep calling the main/master machine to get categories, get hook words # for them, save, and go round again. # # sjd, 2009-09. use strict; my $verbose = 0; my $master = "domU-12-31-39-00-50-74.compute-1.internal"; my $requestURI = "http://$master/cgi-bin/nextCategory.pl"; my $wikiData = "/mnt/wikiData/wikiSplit"; my $runCount = 0; my $thresh = 0.3; # This should tick about every 7 seconds. my $cmdBase = "./wordRanks -tick 500000 -quiet -force -nPrefix 2" . " -pagedir $wikiData -rank -threshold $thresh -unicode" . " -wikiFrequencyLimit 20000"; (-x "./wordRanks") || die "No executable at ./wordRanks.\n"; while(1) { my $catName = `curl --silent $requestURI`; chomp $catName; if ($catName eq "#EOF") { warn "Hit EOF.\n"; exit; } if ($catName eq "") { warn "Empty category name from master.\n"; next; } (my $outfile = $catName) =~ s/['\s]/_/g; $outfile =~ s/^\./_/; ($verbose) && warn "Running category \#$runCount: '$catName'\n"; my $cmd = "$cmdBase -category '$catName' >'$outfile.hooks'"; system "$cmd"; my $scpCmd = "scp '$outfile.hooks'" . " 'root\@$master:/hookwords/'"; ($verbose) && warn "Running: $scpCmd\n"; system "$scpCmd" || warn "Couldn't scp '$outfile.hooks'\n"; system "rm '$outfile.hooks'"; $runCount++; warn "\n"; } warn "Done, handled $runCount categories.\n"; exit;