PERL   69
analyze cgi
Guest on 19th August 2022 04:24:23 PM


  1. #!/usr/bin/perl
  2.  
  3. # analyze.cgi -- analyzes Web page hits by browser, platform, time of day, day of week
  4.  
  5. # the first line of this script may have to be changed
  6. # if your system's Perl interpreter is not located in /usr/bin/perl
  7.  
  8. # Written by P. Lutus Ashland, Oregon lutusp@arachnoid.com
  9.  
  10. # some of the filter-by-browser and filter-by-platform code was derived from public sources
  11.  
  12. # set $show_browsers = 1 if you want a breakdown of the names of the browsers -- a really big table!
  13.  
  14. $show_browsers = 0;
  15.  
  16. # set $show_hours = 1 if you want a breakdown by hour of logon
  17.  
  18. $show_hours = 1;
  19.  
  20. # set $show_days = 1 if you want a breakdown by day of logon
  21.  
  22. $show_days = 1;
  23.  
  24. # $hist_high sets height (or width in this case) of histogram columns,
  25. # wider gives more detail but takes more space
  26.  
  27. $hist_high = 40;
  28.  
  29. @wday_name = ('Sun','Mon','Tue','Wed','Thu','Fri','Sat');
  30.  
  31. print "content-type: text/html\n\n";
  32.  
  33. print "<html><head><title>Browser/Platform/Time Statistics</title></head>";
  34. print "<body bgcolor=#FFFFFF text=#004000>\n";
  35.  
  36. print "<center><h1><Font Color=#0000FF>Browser/Platform/Time Statistics</Font></h1>";
  37.  
  38. print "<Hr Width=80% Align=Center>\n";
  39.  
  40. while (<*.log>) {
  41.   &analyze($_);
  42. }
  43. print "</center></body></html>";
  44.  
  45. sub analyze {
  46.   $filename = $_;
  47.   $hits = 0;
  48.   @browserlist = ();
  49.   @items = ();
  50.   @name = ();
  51.   @data = ();
  52.   @hit_hour = ();
  53.   @hit_day = ();
  54.   %browserlist = ();
  55.   %items = ();
  56.   %name = ();
  57.   %data = ();
  58.   %hit_hour = ();
  59.   %hit_day = ();
  60.   $Mozilla = 0;
  61.   $Mosaic = 0;
  62.   $Microsoft = 0;
  63.   $Lynx = 0;
  64.   $WebExplorer = 0;
  65.   $NetCruiser = 0;
  66.   $Windows = 0;
  67.   $Macintosh = 0;
  68.   $UnixGUI = 0;
  69.   $UnixText = 0;
  70.   $OS2 = 0;
  71.   $top = 0;
  72.   $maxhour = 0;
  73.   $maxday = 0;
  74.   open (LOG,$filename);
  75.   while (<LOG>) {
  76.     chop;
  77.     s/\t /\t/g; # remove leading spaces
  78.     s/  / /g; # remove double spaces
  79.     $str = $_;
  80.     if ($hits == 0) {
  81.       @fnam = split(/\t/,$str);
  82.       $top = 0;
  83.       foreach $nm (@fnam) {
  84.         $name[$top++] = $nm;
  85.       }
  86.     }
  87.     else {
  88.       @field = split(/\t/,$str);
  89.       $i = 0;
  90.       foreach $datum (@field) {
  91.         $data[$i] = $datum;
  92.         if($name[$i] eq "HTTP_USER_AGENT") { # test browser
  93.           $_ = $datum;
  94.           if (/Mozilla/ && !/MSIE/) # watch out for "cloaked" MSIE!
  95.           {$Mozilla++;}
  96.           if (/Mosaic/)
  97.           {$Mosaic++;}
  98.           if (/Microsoft/ || /MSIE/)
  99.           {$Microsoft++;}
  100.           if (/Lynx/)
  101.           {$Lynx++;}
  102.           if (/WebExplorer/)
  103.           {$WebExplorer++;}
  104.           if (/NetCruiser/)
  105.           {$NetCruiser++;}
  106.           if (/Windows|Microsoft|Win32|Quarterdeck|Chameleon|AIR_Mosaic|SPRY_Mosaic|Win16|WinNT|WindowsNT|Win95/)
  107.           {$Windows++;}
  108.           if (/Macintosh/)
  109.           {$Macintosh++;}
  110.           if (/X11|X Window/)
  111.           {$UnixGUI++;}
  112.           if (/Lynx/)
  113.           {$UnixText++;}
  114.           if (/WebExplorer/)
  115.           {$OS2++;}
  116.           if($hits > 0) {
  117.             $browserlist{$datum}++;
  118.           }
  119.         }
  120.         elsif ($name[$i] eq "Time") { # test time
  121.           ($date,$time) = split(/ /,$datum);
  122.           if($show_hours) {
  123.             ($h,$m,$s) = split(/:/,$time);
  124.             $h = int($h/2); # 12 slots
  125.             $hit_hour{$h}++;
  126.             if($maxhour < $hit_hour{$h}) {
  127.               $maxhour = $hit_hour{$h};
  128.             }
  129.           }
  130.           if($show_days) {
  131.             ($mo,$da,$yr) = split('/',$date); # test day of week
  132.             $jd = ($old_date eq $date)?$old_jd:&mdy_jd("$mo/$da/$yr 00:00:00");
  133.             $wd = &dow($jd);
  134.             $hit_day{$wd}++;
  135.             if($maxday < $hit_day{$wd}) {
  136.               $maxday = $hit_day{$wd};
  137.             }
  138.             $old_date = $date;
  139.             $old_jd = $jd;
  140.           }
  141.         }
  142.         $i++;
  143.       }
  144.     } # else data
  145.     $hits++;
  146.   } # while (<LOG>)
  147.  
  148.   close LOG;
  149.   $hits = 0;
  150.   foreach $browser (keys %browserlist) {
  151.     $hits += $browserlist{$browser};
  152.     push (@items,sprintf("%s\t%s\n",$browserlist{$browser},$browser));
  153.   }
  154.   @items = sort {$b <=> $a} @items;
  155.  
  156.   print "<center><Font Color=#800000><h2>Statistics for $filename</h2><P><I>Total Logged Hits: $hits</I></Font></center>";
  157.  
  158.   if ($hits > 0) {
  159.    
  160.     print "<Table width=80% cellpadding=4 align=top Colspec=\"L20 L20 L20\">";
  161.     print "<Tr valign=top><Td align=center>";
  162.    
  163.     print "<table border cellpadding=4>";
  164.     print "<caption><Font Size=+1><Font Color=#400040><B>Browser summary</B></Font></Font></caption>";
  165.     print "<tr><Font Color=#000080><th>Browser</th><th>Hits</th><th>%</th></Font></tr>";
  166.     print sprintf ("<tr align=right><td>Netscape</td><td align=right>%d</td><td><strong><strong>%.2f</strong></td></tr>", $Mozilla, 100*$Mozilla/$hits);
  167.     print sprintf ("<tr align=right><td>Mosaic</td><td align=right>%d</td><td><strong>%.2f</strong></td></tr>", $Mosaic, 100*$Mosaic/$hits);
  168.     print sprintf ("<tr align=right><td>Microsoft</td><td align=right>%d</td><td><strong>%.2f</strong></td></tr>", $Microsoft, 100*$Microsoft/$hits);
  169.     print sprintf ("<tr align=right><td>Lynx</td><td align=right>%d</td><td><strong>%.2f</strong></td></tr>", $Lynx, 100*$Lynx/$hits);
  170.     print sprintf ("<tr align=right><td>WebExplorer</td><td align=right>%d</td><td><strong>%.2f</strong></td></tr>", $WebExplorer, 100*$WebExplorer/$hits);
  171.     print sprintf ("<tr align=right><td>NetCruiser</td><td align=right>%d</td><td><strong>%.2f</strong></td></tr>", $NetCruiser, 100*$NetCruiser/$hits);
  172.     print sprintf ("<tr align=right><td>Accounted for</td><td align=right>%d</td><td><strong>%.2f</strong></td></tr>", $Mozilla+$Mosaic+$Microsoft+$Lynx+$WebExplorer+$NetCruiser, 100*($Mozilla+$Mosaic+$Microsoft+$Lynx+$WebExplorer+$NetCruiser)/$hits);
  173.     print "</table>";
  174.    
  175.     print "</Td><Td align=center>";
  176.    
  177.     print "<table border cellpadding=4>";
  178.     print "<caption><Font Size=+1><Font Color=#400040><B>Platform summary</B></Font></Font></caption>";
  179.     print "<tr><Font Color=#000080><th>Platform</th><th>Hits</th><th>%</th></Font></tr>";
  180.     print sprintf ("<tr align=right><td>Windows</td><td align=right>%d</td><td><strong>%.2f</strong></td></tr>", $Windows, 100*$Windows/$hits);
  181.     print sprintf ("<tr align=right><td>UnixGUI</td><td align=right>%d</td><td><strong>%.2f</strong></td></tr>", $UnixGUI, 100*$UnixGUI/$hits);
  182.     print sprintf ("<tr align=right><td>UnixText</td><td align=right>%d</td><td><strong>%.2f</strong></td></tr>", $UnixText, 100*$UnixText/$hits);
  183.     print sprintf ("<tr align=right><td>Macintosh</td><td align=right>%d</td><td><strong>%.2f</strong></td></tr>", $Macintosh, 100*$Macintosh/$hits);
  184.     print sprintf ("<tr align=right><td>OS/2</td><td align=right>%d</td><td><strong>%.2f</strong></td></tr>", $OS2, 100*$OS2/$hits);
  185.     print sprintf ("<tr align=right><td>Accounted for</td><td align=right>%d</td><td><strong>%.2f</strong></td></tr>", $Windows+$Macintosh+$UnixGUI+$UnixText+$OS2, 100*($Windows+$Macintosh+$UnixGUI+$UnixText+$OS2)/$hits);
  186.     print "</table>";
  187.    
  188.     if (($show_days) && ($maxday > 0)) {
  189.      
  190.       print "</Td><Td align=center>";
  191.      
  192.       print "<table border cellpadding=4>";
  193.       print "<caption><Font Size=+1><Font Color=#400040><B>Daily Hits</B></Font></Font></caption>\n";
  194.       print "<tr><Font Color=#000080><th>Day</th><th>Data</th></Font></tr>\n";
  195.       for ($i = 0;$i < 7;$i++) {
  196.         $z = ($hit_day{$i}) * ($hist_high/$maxday);
  197.         $q = "&nbsp;";
  198.         for($j = 0;$j < $z;$j++) {
  199.           $q = "$q|";
  200.         }
  201.         print sprintf("<tr><td>%s</td><td>%s</td></tr>\n",$wday_name[$i],$q);
  202.       }
  203.       print "</table>";
  204.     } # show_days
  205.    
  206.    
  207.     if (($show_hours) && ($maxhour > 0)) {
  208.      
  209.       print "</Td><Td align=center>";
  210.      
  211.       print "<table border cellpadding=4>";
  212.       print "<caption><Font Size=+1><Font Color=#400040><B>Hourly Hits</B></Font></Font></caption>\n";
  213.       print "<tr><Font Color=#000080><th>From</th><th>To</th><th>Data</th></Font></tr>\n";
  214.       for ($i = 0;$i < 12;$i++) {
  215.         $i1 = sprintf("%02.0f",$i*2);
  216.         $i2 = sprintf("%02.0f",($i*2)+1);
  217.         $z = $hit_hour{$i} * ($hist_high/$maxhour);
  218.         $q = "&nbsp;";
  219.         for($j = 0;$j < $z;$j++) {
  220.           $q = "$q|";
  221.         }
  222.         print sprintf("<tr><td>%s:00</td><td>%s:59</td><td>%s</td></tr>\n",$i1,$i2,$q);
  223.       }
  224.       print "</table>";
  225.     } # show_hours
  226.    
  227.     print "</Td></Tr></Table>";
  228.    
  229.     if($show_browsers) {
  230.      
  231.       print "<Table border=1 width=80% cellpadding=4 align=top Colspec=\"L20 L20 \">\n";
  232.       print "<caption><Font Size=+1><Font Color=#400040><B>Detail by Browser Type</B></Font></Font></caption>\n";
  233.       print "<tr><Font Color=#000080><th>Browser</th><th>Hits</th><th>%</th></Font><tr>\n";
  234.       $sum = 0;
  235.       foreach $item (@items) {
  236.         ($a,$b) = split(/\t/,$item);
  237.         print sprintf("<tr><td>%s</td><td>%s</td><td>%.2f</td></tr>\n",$b,$a,100*$a/$hits);
  238.         $sum += $a;
  239.       }
  240.       print "</table>\n";
  241.     }
  242.   } # $hits > 0
  243.   print "<Hr Width=80% Align=Center>";
  244. }
  245.  
  246. sub mdy_jd { # takes mm/dd/yy/ hh:mm:ss
  247.   ($date,$time) = split(' ',@_[0]);
  248.   ($mo1,$da1,$yr1) = split('/',$date);
  249.   ($h1,$m1,$s1) = split(/:/,$time);
  250.   if($mo1 < 3.0)
  251.   {
  252.     $yr1 -= 1.0;
  253.     $mo1 += 12.0;
  254.   }
  255.   $a = $da1 + ($mo1 * 100.0) + ($yr1 * 10000.0);
  256.   if ($a < 15821015.0) # Julian calendar
  257.   {
  258.     $b = 0;
  259.   }
  260.   else # Gregorian
  261.   {
  262.     $a = int($yr1/100.0);
  263.     $b = 2.0 - $a + int($a/4.0);
  264.   }
  265.   $jd1 = int(365.25*($yr1+4716.0)) + int(30.6001*($mo1+1.0)) + $da1 + $b - 1524.5;
  266.   $jd1 += ($h1/24.0) + ($m1/1440.0) + ($s1/86400.0);
  267. }
  268.  
  269. sub dow {
  270.   $q = (@_[0] - 5) % 7.0;
  271. }

Raw Paste

Login or Register to edit or fork this paste. It's free.