#!/usr/bin/perl -w
#
# Send a summary of control activity.
#
# Copyright 2003-2004, 2008-2009 Russ Allbery <eagle@eyrie.org>
#
# SPDX-License-Identifier: MIT

##############################################################################
# Site configuration
##############################################################################

# Path to the active newsgroup database.
our $ACTIVE = '/srv/control/active.db';

# Default address to send mail to.
our $ADDRESS = 'usenet-config@isc.org';

# Directory in which to find log files that record the actions taken.
our $LOGBASE = '/srv/control/logs';

# Default newsgroup to post to.
our $NEWSGROUP = 'news.lists.misc';

# The address from which the reports are mailed.
our $SENDER = 'eagle@eyrie.org';

##############################################################################
# Modules and declarations
##############################################################################

require 5.006;

use strict;

use Date::Parse qw(str2time);
use DB_File ();
use Fcntl qw(LOCK_SH);
use Getopt::Long qw(GetOptions);
use Net::NNTP ();
use POSIX qw(strftime);
use Text::Template ();

##############################################################################
# Data gathering
##############################################################################

# Given a time interval, build a list of all log file names to be inspected to
# find all log messages in that interval.
sub build_log_names {
    my ($start, $end) = @_;
    die "$0: bad date range: $start to $end\n" if $start > $end;

    # Logs are only broken down by month, so set the day of the month to
    # something that won't cause problems for transitions between months.
    my @current = gmtime $start;
    my @end = gmtime $end;
    $current[3] = 15;

    # Build the log file names.  First increment up to the ending year, and
    # then increment up to the ending month.
    my @logs;
    while ($current[5] < $end[5]) {
        push (@logs, strftime ('log.%Y-%m', @current));
        $current[4]++;
        if ($current[4] == 12) {
            $current[4] = 0;
            $current[5]++;
        }
    }
    while ($current[4] <= $end[4]) {
        push (@logs, strftime ('log.%Y-%m', @current));
        $current[4]++;
    }
    return @logs;
}

# Scan the appropriate log files and find all actions performed in that
# interval.  Takes the start date and the end date (as seconds since epoch)
# and returns all of those actions as a list, without parsing them.
sub scan_log {
    my ($start, $end) = @_;
    my @logs = build_log_names ($start, $end);
    my @actions;
    for my $log (@logs) {
        if (-f "$LOGBASE/$log") {
            open (LOG, "$LOGBASE/$log")
                or die "$0: cannot open $LOGBASE/$log: $!\n";
            local $_;
            while (<LOG>) {
                next unless /^(\d+-\d+-\d+ \d+:\d+:\d+) \[\d+\] (ACTION: .*)/;
                my ($date, $action) = ($1, $2);
                my $time = str2time ($date, 'UTC');
                next if ($time < $start);
                last if ($time > $end);
                push (@actions, "$date $action");
            }
        }
    }
    return @actions;
}

# Takes in a newsgroup name and returns the right number of tabs after it for
# an entry in a newsgroups file.
sub tabs {
    my $newsgroup = shift;
    my $extra = int ((23 - length $newsgroup) / 8);
    $extra = $extra > 0 ? $extra : 0;
    return ("\t" x (1 + $extra));
}

# Given an exclusion pattern for groups and a list of actions, generate a
# textual report of the changes and return it as a string.  Uses the database
# to get descriptions for the affected groups.
sub report {
    my ($exclude, @actions) = @_;
    if ($exclude) {
        @actions = grep { (split ' ')[4] !~ /$exclude/ } @actions;
        return unless @actions;
    }

    # Sort the actions out into their own lists.  Use hash tables so that we
    # don't report the same action on the same group twice.
    my (%newgroup, %rmgroup, %changedesc, %changegroup);
    for (@actions) {
        my ($action, $group) = (split ' ')[3,4];
        if    ($action eq 'newgroup')    { $newgroup{$group}    = 1 }
        elsif ($action eq 'rmgroup')     { $rmgroup{$group}     = 1 }
        elsif ($action eq 'changedesc')  { $changedesc{$group}  = 1 }
        elsif ($action eq 'changegroup') { $changegroup{$group} = 1 }
        else { warn "$0: cannot parse action $action\n" }
    }

    # Open and lock the database.
    my %db;
    open (LOCK, "+> $ACTIVE.lock") or die "$0: cannot open $ACTIVE.lock: $!";
    flock (LOCK, LOCK_SH) or die "$0: cannot lock $ACTIVE.lock: $!";
    tie (%db, 'DB_File', $ACTIVE) or die "$0: cannot tie $ACTIVE: $!";

    # Trim out changes that have been reversed or are no longer applicable
    # and sort the groups.
    my @newgroup    = sort grep {  defined $db{$_} } keys %newgroup;
    my @changegroup = sort grep {  defined $db{$_} } keys %changegroup;
    my @changedesc  = sort grep {  defined $db{$_} } keys %changedesc;
    my @rmgroup     = sort grep { !defined $db{$_} } keys %rmgroup;

    # Generate the report.
    my $report = '';
    if (@newgroup) {
        $report .= "Newsgroups added:\n";
        for (@newgroup) {
            my ($mode, $desc) = split (' ', $db{$_}, 2);
            $report .= $_ . tabs ($_) . "$desc\n";
        }
    }
    if (@rmgroup) {
        $report .= "\n" if $report;
        $report .= "Newsgroups removed:\n" . join ("\n", @rmgroup, '');
    }
    if (@changegroup) {
        $report .= "\n" if $report;
        $report .= "Newsgroups whose status was changed:\n";
        for (@changegroup) {
            my ($mode, $desc) = split (' ', $db{$_}, 2);
            $report .= $_ . tabs ($_) . "$desc\n";
        }
    }
    if (@changedesc) {
        $report .= "\n" if $report;
        $report .= "Changed descriptions:\n";
        for (@changedesc) {
            my ($mode, $desc) = split (' ', $db{$_}, 2);
            $report .= $_ . tabs ($_) . "$desc\n";
        }
    }
    if (@actions) {
        $report .= "\n" if $report;
        $report .= "Raw action log:\n" . join ("\n", @actions, '');
    }

    # Close the database and return.
    untie %db;
    close LOCK;
    return $report;
}

##############################################################################
# Date handling
##############################################################################

# Given a timestamp (or using the current time if no argument is given),
# returns the timestamp for the previous day, handling daylight savings time.
sub yesterday {
    my $now  = defined $_[0] ? $_[0] : time;
    my $then = $now - 60 * 60 * 24;
    my $ndst = (localtime $now)[8] > 0;
    my $tdst = (localtime $then)[8] > 0;
    return $then - ($tdst - $ndst) * 60 * 60;
}

# Given a timestamp (or using the current time if no argument is given),
# returns the timestamp for the next day, handling daylight savings time.
sub tomorrow {
    my $now  = defined $_[0] ? $_[0] : time;
    my $then = $now + 60 * 60 * 24;
    my $ndst = (localtime $now)[8] > 0;
    my $tdst = (localtime $then)[8] > 0;
    return $then - ($tdst - $ndst) * 60 * 60;
}

# Given a flag saying whether to do a weekly report and then zero, one, or two
# arguments, return the start and end timestamps.  If no arguments are given,
# generate a report for the past day or week.  If one argument is given,
# generate a report for the one day (or week) period starting at that time
# (since this will normally just be a date, which represents midnight on that
# day).  If two arguments are given, those are the start and end points.
sub parse_date {
    my ($weekly, $start, $end) = @_;
    if (not defined $start) {
        my $now = time;
        if ($weekly) {
            $start = $now;
            for (1..7) {
                $start = yesterday ($start);
            }
            return ($start, $now);
        } else {
            return (yesterday ($now), $now);
        }
    } elsif (not defined $end) {
        $start = str2time ($start, 'UTC');
        if ($weekly) {
            $end = $start;
            for (1..7) {
                $end = tomorrow ($end);
            }
            return ($start, $end);
        } else {
            return ($start, tomorrow ($start));
        }
    } else {
        return (str2time ($start), str2time ($end));
    }
}

##############################################################################
# Reports
##############################################################################

# Send an e-mail report.  Takes the text report, the start and end times, the
# e-mail address to mail the report to, a template (if any), and a flag saying
# whether to just print the output to standard output.
sub mail {
    my ($report, $start, $end, $address, $template, $nomail) = @_;
    $start = strftime ('%Y-%m-%d', gmtime $start);
    $end   = strftime ('%Y-%m-%d', gmtime $end);
    my $mail;
    if ($template) {
        my $form = Text::Template->new (TYPE => 'FILE', SOURCE => $template);
        my $variables = { start   => $start,
                          end     => $end,
                          address => $address,
                          sender  => $SENDER,
                          report  => $report };
        $mail = $form->fill_in (HASH => $variables);
        unless (defined $mail) {
            die "$0: failed to fill in template: $Text::Template::ERROR\n";
        }
    } else {
        $mail  = "From: Automated Log Scan <$SENDER>\n";
        $mail .= "To: $address\n";
        $mail .= "Subject: Newsgroup changes ($start to $end)\n";
        $mail .= "\n";
        $mail .= $report;
    }
    if ($nomail) {
        open (MAIL, '>&STDOUT') or die "$0: cannot dup stdout: $!\n";
    } else {
        my ($sendmail) = grep { -x $_ }
            qw(/usr/sbin/sendmail /usr/lib/sendmail);
        $sendmail ||= '/usr/lib/sendmail';
        open (MAIL, "| $sendmail -f $SENDER -t -oi -oem")
            or die "$0: cannot open a pipe to sendmail: $!\n";
    }
    print MAIL $mail;
    close MAIL;
    die "$0: sendmail exited with status ", ($? >> 8), "\n" if ($? != 0);
}

# Post a report.  Takes the text report, the start and end times, the
# newsgroup or newsgroups to post the report to, a template (if any), and a
# flag saying whether to just print the output to standard output.
sub post {
    my ($report, $start, $end, $group, $template, $nopost) = @_;
    $start = strftime ('%Y-%m-%d', gmtime $start);
    $end   = strftime ('%Y-%m-%d', gmtime $end);
    my $post;
    if ($template) {
        my $form = Text::Template->new (TYPE => 'FILE', SOURCE => $template);
        unless (defined $form) {
            die "$0: cannot load template $template: $Text::Template::ERROR\n";
        }
        my $variables = { start     => $start,
                          end       => $end,
                          newsgroup => $group,
                          sender    => $SENDER,
                          report    => $report };
        $post = $form->fill_in (HASH => $variables);
        unless (defined $post) {
            die "$0: failed to fill in template: $Text::Template::ERROR\n";
        }
    } else {
        $post  = "From: Automated Log Scan <$SENDER>\n";
        $post .= "Newsgroups: $group\n";
        $post .= "Subject: Newsgroup changes ($start to $end)\n";
        $post .= "Approved: $SENDER\n";
        $post .= "\n";
        $post .= $report;
    }
    if ($nopost) {
        print $post;
    } else {
        my $nntp = Net::NNTP->new
            or die "Cannot connect to news server\n";
        unless ($nntp->post ($post)) {
            die "$0: can't post: ", $nntp->code, ' ', $nntp->message, "\n";
        }
        $nntp->quit;
    }
}

##############################################################################
# Implementation
##############################################################################

# Trim extraneous garbage from the path.
my $fullpath = $0;
$0 =~ s%.*/%%;

# Parse command-line options.
my ($address, $exclude, $newsgroup, $help, $nomail, $post, $raw, $template,
    $weekly, $version);
Getopt::Long::Configure ('bundling');
GetOptions ('address|a=s'          => \$address,
            'exclude|e=s'          => \$exclude,
            'group|g'              => \$newsgroup,
            'help|h'               => \$help,
            'dry-run|just-print|n' => \$nomail,
            'post|p'               => \$post,
            'raw|r'                => \$raw,
            'template|t=s'         => \$template,
            'weekly|w'             => \$weekly) or exit 1;
if ($help) {
    print "Feeding myself to perldoc, please wait....\n";
    exec ('perldoc', '-t', $fullpath);
}
die "$0: too many arguments given\n" if (@ARGV > 2);
$address   ||= $ADDRESS;
$newsgroup ||= $NEWSGROUP;

# Figure out the start and end dates for which we're parsing logs.
my ($start, $end) = parse_date ($weekly, @ARGV);

# Get the actions that were taken in that timeframe.
my @actions = scan_log ($start, $end);

# If there's anything left, send the summary mail or post the message.
if (@actions) {
    my $report;
    if ($raw) {
        $report = join ("\n", @actions, '');
    } else {
        $report = report ($exclude, @actions);
        exit unless $report;
    }
    if ($post) {
        post ($report, $start, $end, $newsgroup, $template, $nomail);
    } else {
        mail ($report, $start, $end, $address, $template, $nomail);
    }
}

##############################################################################
# Documentation
##############################################################################

=head1 NAME

control-summary - Send a summary report of control activity

=head1 SYNOPSIS

control-summary [B<-hnprw>] [B<-a> I<address>] [B<-g> I<newsgroup>]
[B<-t> I<template>] [[I<start>] I<date>]

=head1 DESCRIPTION

B<control-summary> prints out, mails, or posts a summary of activity in an
active newsgroup database maintained by B<process-control> and
B<update-control>, taken from their log files.

If no times are given on the command-line, the activity of the past day is
summarized.  If one time is given, the activity for that day is summarized.
If two times are given, the activity between those two times is summarized.
Times may be given in any format that can be parsed by Date::Parse.

Several options are set at the start of this script, including the default
e-mail address from which reports are sent and the default newsgroup to
which to post.  These options should be changed to fit your local
installation before using this script.

=head1 OPTIONS

=over 4

=item B<-a> I<address>, B<--address>=I<address>

Send the message to I<address> rather than the default of
usenet-config@isc.org.  If you are using this program for your own purposes,
please either use this option or change the default address at the top of
this script.

=item B<-e> I<pattern>, B<--exclude>=I<pattern>

Exclude actions for groups matching I<pattern> from the report, where
I<pattern> is a regular expression.

=item B<-h>, B<--help>

Print out this documentation (which is done simply by feeding the script to
C<perldoc -t>).

=item B<-g> I<newsgroup>, B<--newsgroup>=I<newsgroup>

Post the message to I<newsgroup> instead of the default of news.lists.misc.
This option only makes sense in combination with B<-p>.  If you are using
this program for your own purpose, you probably want to either use this
option or change the default address at the top of the script if you're
posting the results.

=item B<-n>, B<--dry-run>, B<--just-print>

Rather than mail or post the output, just print the mail message or post
that would have been sent to standard out instead.

=item B<-p>, B<--post>

Rather than mailing the message, post it instead.  Note that an Approved
header will be added pointing to the sender configured at the top of this
script, so be careful with moderated groups.

=item B<-r>, B<--raw>

Don't format the report and include the newsgroup descriptions.  Instead,
just print out the actions from the logs.

=item B<-t> I<template>, B<--template>=I<template>

Rather than using the simple mail or news message format defined in the
script, generate the mail or news message from I<template>.  This template
will be processed by Text::Template and should follow its syntax for
embedded code blocks and variables (basically by enclosing Perl expressions
in {} and referring to variables as normal in Perl code).

The following variables will be set:

    $start      The starting date of the report as YYYY-MM-DD.
    $end        The ending date of the report as YYYY-MM-DD.
    $address    The address to send the message, for mail reports.
    $newsgroup  The newsgroup to which to post the message, for posts.
    $report     The text of the report, affected by the --raw flag.

C<$address> and C<$newsgroup> are affected by the B<-a> and B<-g> flags.

=item B<-w>, B<--weekly>

If no or only one date is specified on the command line, use a week as the
default interval instead of a day.  In other words, if no dates are
specified, the report will be for the previous week.  If one date is
specified, the report will be for the week following that date.

=back

=head1 FILES

=over 4

=item F</srv/control/active.db>

The database of currently active newsgroups and their moderation status in
Berkeley DB hash format.

=item F</srv/control/logs>

The directory containing the log messages from control message processing.
These logs are parsed to generate the report.

=back

=head1 AUTHOR

Russ Allbery <eagle@eyrie.org>

=head1 SEE ALSO

process-control(1), update-control(1)

This script is part of the control-archive package.  The control-archive web
page at L<https://www.eyrie.org/~eagle/software/control-archive/> will have
the current version of the package.

=cut
