| 1 |
#!/usr/bin/perl -w |
| 2 |
|
| 3 |
foreach $arg (@ARGV) { |
| 4 |
&spider(0,$arg); |
| 5 |
#@qq = &scnfle($arg); |
| 6 |
#print @qq; |
| 7 |
} |
| 8 |
|
| 9 |
sub scnfle { |
| 10 |
local($topfile) = $_[0]; |
| 11 |
local(@listofhrefs) = (); |
| 12 |
|
| 13 |
open(HF,$topfile) || die "Couldn't open $topfile!\n"; |
| 14 |
|
| 15 |
while (<HF>) { |
| 16 |
if (s/.*href=(["a-zA-Z0-9:\/].*)">.*/$1/) { |
| 17 |
s/"//g; # strip out quotes |
| 18 |
s/>.*//; # strip of end |
| 19 |
chop; |
| 20 |
@listofhrefs=(@listofhrefs,$_) |
| 21 |
}; |
| 22 |
} |
| 23 |
@listofhrefs; |
| 24 |
} |
| 25 |
|
| 26 |
sub spider { |
| 27 |
local($thislevel) = $_[0]; |
| 28 |
local($thisfile) = $_[1]; |
| 29 |
|
| 30 |
if ($thislevel >=2) {return;} |
| 31 |
|
| 32 |
print "spider: level $thislevel, $thisfile\n"; |
| 33 |
@hrefs=scnfle($thisfile); |
| 34 |
#print "spider: $thisfile: @hrefs\n"; |
| 35 |
foreach $href (@hrefs) { |
| 36 |
if ($href ne $thisfile) { |
| 37 |
print "$thisfile: -> $href\n"; |
| 38 |
spider($thislevel+1,$href); |
| 39 |
} |
| 40 |
} |
| 41 |
} |