[prev in list] [next in list] [prev in thread] [next in thread]
List: kc-kde
Subject: Re: Fwd: Re: [kc-kde] gnome summary
From: Chris Howells <chris () chrishowells ! co ! uk>
Date: 2002-03-19 18:45:23
[Download RAW message or body]
-----BEGIN PGP SIGNED MESSAGE-----
Hash: SHA1
On Tuesday 19 March 2002 8:56 am, Aaron J. Seigo wrote:
> both have essentially the same information.
Yes. But with the mailing list you can download the lot in one go and go
offline for the analysis. With cvs log you must be connected to the net, and
you're dependent on the speed of the cvs server at that particular time.
> no problems. ... i'll take this on next week and stop deleting kde-cvs
> emails ;-)
Cool. I have a 'newsscan' script here. It's attached. I don't know whether
it's any use.
Maybe it could be adapted. It's designed to be run over a leafnode new spool
thing. kde-cvs is available over nntp, so maybe...
If you want any more info about how it works, please let me know.
> on our work projects we do this. it's a lot of fun ... well, until you get
> into debugging / tweaking and then you realize you just spent the last 2
> hours to only commit a +3 / -2 ... ack!
Hah.
- --
Cheers, Chris Howells -- chris@chrishowells.co.uk, howells@kde.org
Web: http://chrishowells.co.uk, PGP key: http://chrishowells.co.uk/pgp.txt
KDE: http://www.koffice.org, http://edu.kde.org, http://usability.kde.org
-----BEGIN PGP SIGNATURE-----
Version: GnuPG v1.0.6 (GNU/Linux)
Comment: For info see http://www.gnupg.org
iD8DBQE8l4dDF8Iu1zN5WiwRAj0hAJ9sNOmEqJ2LGbxa1FS+povbrIC08ACePNMM
Tw9SmUxwHcvjmAxRQfylmhM=
=iSO/
-----END PGP SIGNATURE-----
["newscan.pl" (text/x-perl)]
#! /usr/bin/perl -w
use strict;
use Data::Dumper 'DumperX';
use News::Scan;
my %aliases = (
'goku\@uklinux.net' => 'eddietheman\@penguinpowered.com',
);
my @exclude = (
'see\@reply.to',
'not\@home.com',
'test\@test.test',
);
my $newsgroup = pop @ARGV;
my $newsdir = $newsgroup;
$newsdir =~ tr/./\//;
$newsdir = "/var/spool/news/".$newsdir;
my $scan = new News::Scan Group => $newsgroup,
Spool => $newsdir,
Period => 7,
Aliases => \%aliases,
Exclude => \@exclude,
From => 'spool';
unless (defined $scan) {
die "$0: Failed to create News::Scan object!\n";
}
elsif ($scan->error) {
die "$0: Error: " . $scan->error . "\n";
}
$scan->scan;
if ($scan->error) {
die "$0: Error: " . $scan->error . "\n";
}
$Data::Dumper::Indent = 0; ## minimal whitespace
$Data::Dumper::Purity = 1; ## get correct topology at any cost
print DumperX $scan;
exit;
["gennews.sh" (application/x-shellscript)]
#!/bin/bash
GROUP="uk.rec.scouting"
PERL="/usr/bin/perl5.005"
DATE=`date +"%d.%b.%Y"`
LOCATION="/var/spool/news/out.going/$GROUP.stats"
$PERL newscan.pl $GROUP | $PERL news-stats - | sed 's/\@/\@munged/g' | sed 's/\@/ at /g' > $GROUP-$DATE
su -c - news "cp $GROUP-$DATE $LOCATION"
echo "Stats are" $GROUP-$DATE
["news-stats" (application/x-perl)]
#! /usr/bin/perl -w
use strict;
use POSIX;
use News::Scan;
## subs
sub in_kb {
my $val = shift;
sprintf "%.1f", ($val / 1024);
}
sub commify {
local $_ = shift;
1 while s/^(-?\d+)(\d\d\d)/$1,$2/;
$_;
}
sub places {
my $acc = shift;
my $val = shift;
sprintf "%.${acc}f", $val
}
sub median {
my @values = sort { $a <=> $b } @_;
my $n = @values;
if ($n % 2 == 1) {
return $values[$n / 2];
}
else {
return places 1, ($values[$n / 2] + $values[$n/2 - 1]) / 2;
}
}
sub mode {
my %scores;
local $_;
for (@_) {
$scores{$_}++;
}
my @scores = sort { $scores{$b} <=> $scores{$a} } keys %scores;
my $high = $scores[0];
my $freq = $scores{$high};
my $i = 0;
for (@scores) {
if ($scores{$_} != $freq) {
splice @scores, $i;
last;
}
$i++;
}
if (@scores == 1) {
return ($high, $freq);
}
elsif (@scores == 2) {
return (join(" and ", @scores), $freq);
}
else {
my $last = pop @scores;
my $ret;
$ret = join ", ", @scores;
$ret .= ", and $last";
return ($ret, $freq);
}
}
sub stdev {
my @values = @_;
my $avg = shift;
my $n = @values;
my $sum = 0;
local $_;
for (@values) {
$sum += ($_ - $avg) ** 2;
}
places(1, sqrt($sum / $n));
}
sub print_header {
my $scan = shift;
my $group = $scan->name;
my $period = $scan->period;
my $quote_re = $scan->quote_re;
my $earliest = strftime "%d %b %Y %H:%M:%S", gmtime($scan->earliest);
my $latest = strftime "%d %b %Y %H:%M:%S", gmtime($scan->latest);
my $today = scalar localtime(time());
print <<EOF;
From: Chris Howells' Computer <chrish\@gmx.co.uk>
Newsgroups: $group
Subject: Statistics for $group for week ending $today
User-Agent: NewsScan - E-Mail me if you want a copy
Following is a summary of articles spanning a $period day period,
beginning at $earliest GMT and ending at
$latest GMT.
Notes
=====
- A line in the body of a post is considered to be original if it
does *not* match the regular expression /$quote_re/.
- All text after the last cut line (/^-- \$/) in the body is
considered to be the author's signature.
- The scanner prefers the Reply-To: header over the From: header
in determining the "real" e-mail address and name.
- Original Content Rating is the ratio of the original content volume
to the total body volume.
- Please send all comments to Chris Howells
(chrish at gmx dot co dot uk).
EOF
}
sub excluded {
my $scan = shift;
my @excludes = @{ $scan->excludes };
if (@excludes) {
print <<EOBanner;
Excluded Posters
================
EOBanner
print join "\n", @excludes, "", "";
}
}
sub totals {
my $scan = shift;
my $posters = $scan->posters;
my $num_posters = scalar keys %$posters;
my $num_sigs = $scan->signatures;
my $num_articles = $scan->articles;
my $threads = $scan->threads;
my $num_threads = scalar keys %$threads;
my $total_volume = in_kb $scan->volume;
my $hdr_volume = in_kb $scan->header_volume;
my $hdr_lines = commify $scan->header_lines;
my $body_volume = in_kb $scan->body_volume;
my $body_lines = commify $scan->body_lines;
my $orig_volume = in_kb $scan->orig_volume;
my $orig_lines = commify $scan->orig_lines;
my $sig_volume = in_kb $scan->sig_volume;
my $sig_lines = commify $scan->sig_lines;
my $ocr = sprintf "%.3f", ($scan->orig_volume / $scan->body_volume);
print <<EOTotals;
Totals
======
Posters: $num_posters
Articles: $num_articles ($num_sigs with cutlined signatures)
Threads: $num_threads
Volume generated: $total_volume kb
- headers: $hdr_volume kb ($hdr_lines lines)
- bodies: $body_volume kb ($body_lines lines)
- original: $orig_volume kb ($orig_lines lines)
- signatures: $sig_volume kb ($sig_lines lines)
Original Content Rating: $ocr
EOTotals
}
sub avgs {
my $scan = shift;
my $posters = $scan->posters;
my $num_posters = scalar keys %$posters;
my $posts_avg = places 1, ($scan->articles / $num_posters);
my @posts_by_poster = map { $_->articles } values %$posters;
my $pmed = median @posts_by_poster;
my($pmode, $pmode_score) = mode @posts_by_poster;
my $psd = stdev @posts_by_poster, ($scan->articles / $num_posters);
my $threads = $scan->threads;
my $num_threads = scalar keys %$threads;
my $thr_avg = places 1, ($scan->articles / $num_threads);
my @posts_by_thread = map { $_->articles } values %$threads;
my $tmed = median @posts_by_thread;
my($tmode, $tmode_score) = mode @posts_by_thread;
my $tsd = stdev @posts_by_thread, ($scan->articles / $num_threads);
my $num_articles = $scan->articles;
my $msg = places 1, ($scan->volume / $num_articles);
my $hdr = places 1, ($scan->header_volume / $num_articles);
my $hdr_lines = places 1, ($scan->header_lines / $num_articles);
my $body = places 1, ($scan->body_volume / $num_articles);
my $body_lines = places 1, ($scan->body_lines / $num_articles);
my $orig = places 1, ($scan->orig_volume / $num_articles);
my $orig_lines = places 1, ($scan->orig_lines / $num_articles);
my $sig = places 1, ($scan->sig_volume / $num_articles);
my $sig_lines = places 1, ($scan->sig_lines / $num_articles);
print <<EOAvgs;
Averages
========
Posts per poster: $posts_avg
median: $pmed post@{[$pmed == 1 ? "" : "s"]}
mode: $pmode post@{[$pmode == 1 ? "" : "s"]} - $pmode_score poster@{[$pmode_score == 1 ? "" : "s"]}
s: $psd post@{[$psd == 1 ? "" : "s"]}
Posts per thread: $thr_avg
median: $tmed post@{[$tmed == 1 ? "" : "s"]}
mode: $tmode post@{[$tmode == 1 ? "" : "s"]} - $tmode_score thread@{[$tmode_score == 1 ? "" : "s"]}
s: $tsd post@{[$tsd == 1 ? "" : "s"]}
Message size: $msg bytes
- header: $hdr bytes ($hdr_lines lines)
- body: $body bytes ($body_lines lines)
- original: $orig bytes ($orig_lines lines)
- signature: $sig bytes ($sig_lines lines)
EOAvgs
}
sub top_posters {
my $scan = shift;
my @top;
my $top_total;
local $_;
my $posters = $scan->posters;
## by posts
print <<EOBanner;
Top 10 Posters by Number of Posts
=================================
(kb) (kb) (kb) (kb)
Posts Volume ( hdr/ body/ orig) Address
----- -------------------------- -------
EOBanner
@top = ( map { $_->[0] }
sort { $b->[1] <=> $a->[1] }
map { [ $_, $_->articles ] }
values %$posters )[0 .. 9];
$top_total = 0;
for (@top) {
my $vol = sprintf "%5.1f (%5.1f/%5.1f/%5.1f)",
$_->volume / 1024,
$_->header_volume / 1024,
$_->body_volume / 1024,
$_->orig_volume / 1024;
printf "%5d %26s %s\n", $_->articles, $vol,
$_->attrib;
$top_total += $_->articles;
}
printf "\nThese posters accounted for %.1f%% of all articles.\n",
100 * $top_total / $scan->articles;
## by volume
print <<EOBanner;
Top 10 Posters by Volume
========================
(kb) (kb) (kb) (kb)
Volume ( hdr/ body/ orig) Posts Address
-------------------------- ----- -------
EOBanner
@top = ( map { $_->[0] }
sort { $b->[1] <=> $a->[1] }
map { [ $_, $_->volume ] }
values %$posters )[0 .. 9];
$top_total = 0;
for (@top) {
my $vol = sprintf "%5.1f (%5.1f/%5.1f/%5.1f)",
$_->volume / 1024,
$_->header_volume / 1024,
$_->body_volume / 1024,
$_->orig_volume / 1024;
printf "%26s %5d %s\n", $vol, $_->articles,
$_->attrib;
$top_total += $_->volume;
}
printf "\nThese posters accounted for %.1f%% of the total volume.\n",
100 * $top_total / $scan->volume;
## top OCR
print <<EOBanner;
Top 10 Posters by OCR (minimum of five posts)
==============================================
(kb) (kb)
OCR orig / body Posts Address
----- -------------- ----- -------
EOBanner
@top = ( sort { $b->[1] <=> $a->[1] }
map { [ $_, ($_->orig_volume / $_->body_volume) ] }
grep { $_->articles >= 5 }
values %$posters )[0 .. 9];
for (@top) {
printf "%.3f (%5.1f /%5.1f) %5d %s\n",
$_->[1], $_->[0]->orig_volume / 1024,
$_->[0]->body_volume / 1024,
$_->[0]->articles, $_->[0]->attrib;
}
## bottom OCR
print <<EOBanner;
Bottom 10 Posters by OCR (minimum of five posts)
=================================================
(kb) (kb)
OCR orig / body Posts Address
----- -------------- ----- -------
EOBanner
@top = ( sort { $a->[1] <=> $b->[1] }
map { [ $_, ($_->orig_volume / $_->body_volume) ] }
grep { $_->articles >= 5 }
values %$posters )[0 .. 9];
for (reverse @top) {
printf "%.3f (%5.1f /%5.1f) %5d %s\n",
$_->[1], $_->[0]->orig_volume / 1024,
$_->[0]->body_volume / 1024,
$_->[0]->articles, $_->[0]->attrib;
}
print "\n";
}
sub top_threads {
my $scan = shift;
my @top;
my $threads = $scan->threads;
local $_;
## by posts
print <<EOBanner;
Top 10 Threads by Number of Posts
=================================
Posts Subject
----- -------
EOBanner
@top = ( map { $_->[0] }
sort { $b->[1] <=> $a->[1] }
map { [ $_, $_->articles ] }
values %$threads )[0 .. 9];
for (@top) {
printf "%5d %s\n", $_->articles, $_->subject;
}
## by volume
print <<EOBanner;
Top 10 Threads by Volume
========================
(kb) (kb) (kb) (kb)
Volume ( hdr/ body/ orig) Posts Subject
-------------------------- ----- -------
EOBanner
@top = ( map { $_->[0] }
sort { $b->[1] <=> $a->[1] }
map { [ $_, $_->volume ] }
values %$threads )[0 .. 9];
for (@top) {
my $vol = sprintf "%5.1f (%5.1f/%5.1f/%5.1f)",
$_->volume / 1024,
$_->header_volume / 1024,
$_->body_volume / 1024,
$_->orig_volume / 1024;
printf "%26s %5d %s\n", $vol, $_->articles,
$_->subject;
}
## top OCR
print <<EOBanner;
Top 10 Threads by OCR (minimum of three posts)
==============================================
(kb) (kb)
OCR orig / body Posts Subject
----- -------------- ----- -------
EOBanner
@top = ( sort { $b->[1] <=> $a->[1] }
map { [ $_, ($_->orig_volume / $_->body_volume) ] }
grep { $_->articles >= 3 }
values %$threads )[0 .. 9];
for (@top) {
printf "%.3f (%5.1f/ %5.1f) %5d %s\n",
$_->[1], $_->[0]->orig_volume / 1024,
$_->[0]->body_volume / 1024,
$_->[0]->articles, $_->[0]->subject;
}
## bottom OCR
print <<EOBanner;
Bottom 10 Threads by OCR (minimum of three posts)
=================================================
(kb) (kb)
OCR orig / body Posts Subject
----- -------------- ----- -------
EOBanner
@top = ( sort { $a->[1] <=> $b->[1] }
map { [ $_, ($_->orig_volume / $_->body_volume) ] }
grep { $_->articles >= 3 }
values %$threads )[0 .. 9];
for (reverse @top) {
printf "%.3f (%5.1f /%5.1f) %5d %s\n",
$_->[1], $_->[0]->orig_volume / 1024,
$_->[0]->body_volume / 1024,
$_->[0]->articles, $_->[0]->subject;
}
print "\n";
}
sub top_xposts {
my $scan = shift;
my @top;
my $xposts = $scan->crossposts || return;
my $posters = $scan->posters;
local $_;
print <<EOBanner;
Top 10 Targets for Crossposts
=============================
Articles Newsgroup
-------- ---------
EOBanner
@top = ( sort { $b->[1] <=> $a->[1] }
map { [ $_, $xposts->{$_} ] }
keys %$xposts )[0 .. 9];
for (@top) {
next unless defined $_;
printf "%8d %s\n", $_->[1], $_->[0];
}
print <<EOBanner;
Top 10 Crossposters
===================
Articles Address
-------- -------
EOBanner
@top = ( sort { $b->[1] <=> $a->[1] }
map { [ $_, $_->crossposts ] }
values %$posters )[0 .. 9];
for (@top) {
next unless defined $_;
printf "%8d %s\n", $_->[1], $_->[0]->attrib;
}
}
sub print_stats {
my $scan = shift;
excluded $scan;
totals $scan;
avgs $scan;
top_posters $scan;
top_threads $scan;
top_xposts $scan;
}
## main
my $dump = shift || die "Usage: $0 <dumpfile>\n";
my $scan;
{
my $VAR1; ## from the Data::Dumper output
open DUMP, $dump or die "$0: failed open $dump: $!\n";
local $/ = undef;
my $data = <DUMP>;
$scan = eval $data;
die "$0: Error evaluating dumpfile: $@\n" if $@;
close DUMP;
}
print_header $scan;
print_stats $scan;
_______________________________________________
kc-kde mailing list
kc-kde@mail.kde.org
http://mail.kde.org/mailman/listinfo/kc-kde
[prev in list] [next in list] [prev in thread] [next in thread]
Configure |
About |
News |
Add a list |
Sponsored by KoreLogic