1 |
adcroft |
1.1 |
#!/usr/bin/perl -w |
2 |
|
|
|
3 |
|
|
foreach $arg (@ARGV) { |
4 |
|
|
&spider(0,$arg); |
5 |
|
|
#@qq = &scnfle($arg); |
6 |
|
|
#print @qq; |
7 |
|
|
} |
8 |
|
|
|
9 |
|
|
sub scnfle { |
10 |
|
|
local($topfile) = $_[0]; |
11 |
|
|
local(@listofhrefs) = (); |
12 |
|
|
|
13 |
|
|
open(HF,$topfile) || die "Couldn't open $topfile!\n"; |
14 |
|
|
|
15 |
|
|
while (<HF>) { |
16 |
|
|
if (s/.*href=(["a-zA-Z0-9:\/].*)">.*/$1/) { |
17 |
|
|
s/"//g; # strip out quotes |
18 |
|
|
s/>.*//; # strip of end |
19 |
|
|
chop; |
20 |
|
|
@listofhrefs=(@listofhrefs,$_) |
21 |
|
|
}; |
22 |
|
|
} |
23 |
|
|
@listofhrefs; |
24 |
|
|
} |
25 |
|
|
|
26 |
|
|
sub spider { |
27 |
|
|
local($thislevel) = $_[0]; |
28 |
|
|
local($thisfile) = $_[1]; |
29 |
|
|
|
30 |
|
|
if ($thislevel >=2) {return;} |
31 |
|
|
|
32 |
|
|
print "spider: level $thislevel, $thisfile\n"; |
33 |
|
|
@hrefs=scnfle($thisfile); |
34 |
|
|
#print "spider: $thisfile: @hrefs\n"; |
35 |
|
|
foreach $href (@hrefs) { |
36 |
|
|
if ($href ne $thisfile) { |
37 |
|
|
print "$thisfile: -> $href\n"; |
38 |
|
|
spider($thislevel+1,$href); |
39 |
|
|
} |
40 |
|
|
} |
41 |
|
|
} |