#!/usr/bin/perl
# analyze.cgi -- analyzes Web page hits by browser, platform, time of day, day of week
# the first line of this script may have to be changed
# if your system's Perl interpreter is not located in /usr/bin/perl
# Written by P. Lutus Ashland, Oregon lutusp@arachnoid.com
# some of the filter-by-browser and filter-by-platform code was derived from public sources
# set $show_browsers = 1 if you want a breakdown of the names of the browsers -- a really big table!
$show_browsers = 0;
# set $show_hours = 1 if you want a breakdown by hour of logon
$show_hours = 1;
# set $show_days = 1 if you want a breakdown by day of logon
$show_days = 1;
# $hist_high sets height (or width in this case) of histogram columns,
# wider gives more detail but takes more space
$hist_high = 40;
@wday_name = ('Sun','Mon','Tue','Wed','Thu','Fri','Sat');
print "content-type: text/html\n\n";
print "<html><head><title>Browser/Platform/Time Statistics</title></head>";
print "<body bgcolor=#FFFFFF text=#004000>\n";
print "<center><h1><Font Color=#0000FF>Browser/Platform/Time Statistics</Font></h1>";
print "<Hr Width=80% Align=Center>\n";
while (<*.log>) {
&analyze($_);
}
print "</center></body></html>";
sub analyze {
$filename = $_;
$hits = 0;
@browserlist = ();
@items = ();
@name = ();
@data = ();
@hit_hour = ();
@hit_day = ();
%browserlist = ();
%items = ();
%name = ();
%data = ();
%hit_hour = ();
%hit_day = ();
$Mozilla = 0;
$Mosaic = 0;
$Microsoft = 0;
$Lynx = 0;
$WebExplorer = 0;
$NetCruiser = 0;
$Windows = 0;
$Macintosh = 0;
$UnixGUI = 0;
$UnixText = 0;
$OS2 = 0;
$top = 0;
$maxhour = 0;
$maxday = 0;
while (<LOG>) {
s/\t /\t/g; # remove leading spaces
s/ / /g; # remove double spaces
$str = $_;
if ($hits == 0) {
@fnam = split(/\t/,$str);
$top = 0;
foreach $nm (@fnam) {
$name[$top++] = $nm;
}
}
else {
@field = split(/\t/,$str);
$i = 0;
foreach $datum (@field) {
$data[$i] = $datum;
if($name[$i] eq "HTTP_USER_AGENT") { # test browser
$_ = $datum;
if (/Mozilla/ && !/MSIE/) # watch out for "cloaked" MSIE!
{$Mozilla++;}
if (/Mosaic/)
{$Mosaic++;}
if (/Microsoft/ || /MSIE/)
{$Microsoft++;}
if (/Lynx/)
{$Lynx++;}
if (/WebExplorer/)
{$WebExplorer++;}
if (/NetCruiser/)
{$NetCruiser++;}
if (/Windows|Microsoft|Win32|Quarterdeck|Chameleon|AIR_Mosaic|SPRY_Mosaic|Win16|WinNT|WindowsNT|Win95/)
{$Windows++;}
if (/Macintosh/)
{$Macintosh++;}
if (/X11|X Window/)
{$UnixGUI++;}
if (/Lynx/)
{$UnixText++;}
if (/WebExplorer/)
{$OS2++;}
if($hits > 0) {
$browserlist{$datum}++;
}
}
elsif ($name[$i] eq "Time") { # test time
($date,$time) = split(/ /,$datum);
if($show_hours) {
($h,$m,$s) = split(/:/,$time);
$h = int($h/2); # 12 slots
$hit_hour{$h}++;
if($maxhour < $hit_hour{$h}) {
$maxhour = $hit_hour{$h};
}
}
if($show_days) {
($mo,$da,$yr) = split('/',$date); # test day of week
$jd = ($old_date eq $date)?$old_jd:&mdy_jd("$mo/$da/$yr 00:00:00");
$wd = &dow($jd);
$hit_day{$wd}++;
if($maxday < $hit_day{$wd}) {
$maxday = $hit_day{$wd};
}
$old_date = $date;
$old_jd = $jd;
}
}
$i++;
}
} # else data
$hits++;
} # while (<LOG>)
$hits = 0;
foreach $browser (keys %browserlist) {
$hits += $browserlist{$browser};
push (@items,sprintf("%s\t%s\n",$browserlist{$browser},$browser));
}
@items = sort {$b <=> $a} @items;
print "<center><Font Color=#800000><h2>Statistics for $filename</h2><P><I>Total Logged Hits: $hits</I></Font></center>";
if ($hits > 0) {
print "<Table width=80% cellpadding=4 align=top Colspec=\"L20 L20 L20\">";
print "<Tr valign=top><Td align=center>";
print "<table border cellpadding=4>";
print "<caption><Font Size=+1><Font Color=#400040><B>Browser summary</B></Font></Font></caption>";
print "<tr><Font Color=#000080><th>Browser</th><th>Hits</th><th>%</th></Font></tr>";
print sprintf ("<tr align=right><td>Netscape</td><td align=right>%d</td><td><strong><strong>%.2f</strong></td></tr>", $Mozilla, 100*$Mozilla/$hits);
print sprintf ("<tr align=right><td>Mosaic</td><td align=right>%d</td><td><strong>%.2f</strong></td></tr>", $Mosaic, 100*$Mosaic/$hits);
print sprintf ("<tr align=right><td>Microsoft</td><td align=right>%d</td><td><strong>%.2f</strong></td></tr>", $Microsoft, 100*$Microsoft/$hits);
print sprintf ("<tr align=right><td>Lynx</td><td align=right>%d</td><td><strong>%.2f</strong></td></tr>", $Lynx, 100*$Lynx/$hits);
print sprintf ("<tr align=right><td>WebExplorer</td><td align=right>%d</td><td><strong>%.2f</strong></td></tr>", $WebExplorer, 100*$WebExplorer/$hits);
print sprintf ("<tr align=right><td>NetCruiser</td><td align=right>%d</td><td><strong>%.2f</strong></td></tr>", $NetCruiser, 100*$NetCruiser/$hits);
print sprintf ("<tr align=right><td>Accounted for</td><td align=right>%d</td><td><strong>%.2f</strong></td></tr>", $Mozilla+$Mosaic+$Microsoft+$Lynx+$WebExplorer+$NetCruiser, 100*($Mozilla+$Mosaic+$Microsoft+$Lynx+$WebExplorer+$NetCruiser)/$hits);
print "</Td><Td align=center>";
print "<table border cellpadding=4>";
print "<caption><Font Size=+1><Font Color=#400040><B>Platform summary</B></Font></Font></caption>";
print "<tr><Font Color=#000080><th>Platform</th><th>Hits</th><th>%</th></Font></tr>";
print sprintf ("<tr align=right><td>Windows</td><td align=right>%d</td><td><strong>%.2f</strong></td></tr>", $Windows, 100*$Windows/$hits);
print sprintf ("<tr align=right><td>UnixGUI</td><td align=right>%d</td><td><strong>%.2f</strong></td></tr>", $UnixGUI, 100*$UnixGUI/$hits);
print sprintf ("<tr align=right><td>UnixText</td><td align=right>%d</td><td><strong>%.2f</strong></td></tr>", $UnixText, 100*$UnixText/$hits);
print sprintf ("<tr align=right><td>Macintosh</td><td align=right>%d</td><td><strong>%.2f</strong></td></tr>", $Macintosh, 100*$Macintosh/$hits);
print sprintf ("<tr align=right><td>OS/2</td><td align=right>%d</td><td><strong>%.2f</strong></td></tr>", $OS2, 100*$OS2/$hits);
print sprintf ("<tr align=right><td>Accounted for</td><td align=right>%d</td><td><strong>%.2f</strong></td></tr>", $Windows+$Macintosh+$UnixGUI+$UnixText+$OS2, 100*($Windows+$Macintosh+$UnixGUI+$UnixText+$OS2)/$hits);
if (($show_days) && ($maxday > 0)) {
print "</Td><Td align=center>";
print "<table border cellpadding=4>";
print "<caption><Font Size=+1><Font Color=#400040><B>Daily Hits</B></Font></Font></caption>\n";
print "<tr><Font Color=#000080><th>Day</th><th>Data</th></Font></tr>\n";
for ($i = 0;$i < 7;$i++) {
$z = ($hit_day{$i}) * ($hist_high/$maxday);
$q = " ";
for($j = 0;$j < $z;$j++) {
$q = "$q|";
}
print sprintf("<tr><td>%s</td><td>%s</td></tr>\n",$wday_name[$i],$q);
}
} # show_days
if (($show_hours) && ($maxhour > 0)) {
print "</Td><Td align=center>";
print "<table border cellpadding=4>";
print "<caption><Font Size=+1><Font Color=#400040><B>Hourly Hits</B></Font></Font></caption>\n";
print "<tr><Font Color=#000080><th>From</th><th>To</th><th>Data</th></Font></tr>\n";
for ($i = 0;$i < 12;$i++) {
$z = $hit_hour{$i} * ($hist_high/$maxhour);
$q = " ";
for($j = 0;$j < $z;$j++) {
$q = "$q|";
}
print sprintf("<tr><td>%s:00</td><td>%s:59</td><td>%s</td></tr>\n",$i1,$i2,$q);
}
} # show_hours
print "</Td></Tr></Table>";
if($show_browsers) {
print "<Table border=1 width=80% cellpadding=4 align=top Colspec=\"L20 L20 \">\n";
print "<caption><Font Size=+1><Font Color=#400040><B>Detail by Browser Type</B></Font></Font></caption>\n";
print "<tr><Font Color=#000080><th>Browser</th><th>Hits</th><th>%</th></Font><tr>\n";
$sum = 0;
foreach $item (@items) {
($a,$b) = split(/\t/,$item);
print sprintf("<tr><td>%s</td><td>%s</td><td>%.2f</td></tr>\n",$b,$a,100*$a/$hits);
$sum += $a;
}
}
} # $hits > 0
print "<Hr Width=80% Align=Center>";
}
sub mdy_jd { # takes mm/dd/yy/ hh:mm:ss
($date,$time) = split(' ',@_[0]);
($mo1,$da1,$yr1) = split('/',$date);
($h1,$m1,$s1) = split(/:/,$time);
if($mo1 < 3.0)
{
$yr1 -= 1.0;
$mo1 += 12.0;
}
$a = $da1 + ($mo1 * 100.0) + ($yr1 * 10000.0);
if ($a < 15821015.0) # Julian calendar
{
$b = 0;
}
else # Gregorian
{
$b = 2.0 - $a + int($a/4.0);
}
$jd1 = int(365.25*($yr1+4716.0)) + int(30.6001*($mo1+1.0)) + $da1 + $b - 1524.5;
$jd1 += ($h1/24.0) + ($m1/1440.0) + ($s1/86400.0);
}
sub dow {
$q = (@_[0] - 5) % 7.0;
}