source: liacs/ccs/op3/robot.pl@ 4

Last change on this file since 4 was 2, checked in by Rick van der Zwet, 15 years ago

Initial import of data of old repository ('data') worth keeping (e.g. tracking
means of URL access statistics)

  • Property svn:executable set to *
File size: 656 bytes
RevLine 
[2]1#!/usr/bin/env perl
2do "common.pl";
3
4%user_agent;
5
6open(FILE,$file);
7while(<FILE>) {
8 /\S*\ \S*\ \S*\ \S*\ \S*\ (\S*)\ \S*\ \S*\ \S* \S*\ ".*"\ (.*)$/;
9 $url = $1;
10 $agent = $2;
11 if ( not exists $user_agent{$agent} ) {
12 $user_agent{$agent} = "UN";
13 }
14}
15close(FILE);
16
17
18$robot;
19foreach $agent (sort keys %user_agent) {
20 if ( $agent =~ /(?:bot|spider|slurp|search|crawler|checker|downloader|worm)/i ) {
21 $robot++;
22 }
23# Manual search remaining agents
24# else {
25# print $agent;
26# }
27}
28
29$total = scalar(keys %user_agent);
30print "robot/others user-agent: $robot/$total ",
31 percent($robot,$total), "\n";
32exit(0);
Note: See TracBrowser for help on using the repository browser.