#!/usr/bin/perl -w
#
# Generate config and doc files from hierarchy configs.
#
# This program converts machine-parsable hierarchy configuration fragments
# into a control.ctl file in the format that had been used for the ISC and INN
# versions of that file.  It also supports special overrides.
#
# Copyright 2002-2004, 2008, 2020 Russ Allbery <eagle@eyrie.org>
#
# SPDX-License-Identifier: MIT

##############################################################################
# Modules and declarations
##############################################################################

require 5.006;

use strict;

use POSIX qw(strftime);
use Text::Wrap qw(wrap);

# If this variable is set when printing a header for a control.ctl entry, we
# need to print a blank comment line before the header.
our $NEEDSPACE;

##############################################################################
# Parsing
##############################################################################

# Convert a boolean value to 0 or 1, dieing with an error if the value isn't
# one of the valid boolean values.  Takes the value, the file, and the key
# (the latter two for an error message).
sub boolean {
    my ($value, $file, $key) = @_;
    $value = lc $value;
    if ($value eq 'yes' || $value eq 'on' || $value eq 'true') {
        $value = 1;
    } elsif ($value eq 'no' || $value eq 'off' || $value eq 'false') {
        $value = 0;
    } else {
        die "Invalid value for $key in $file: $value\n";
    }
    return $value;
}

# Parse a hierarchy configuration fragment.  Takes a filename and returns a
# reference to a hash containing all of the elements of the configuration.
sub parse_config {
    my ($file) = @_;

    # Set defaults.
    my %config = ('non-pgp-drop' => 'yes');

    # Parse configuration file.
    open (CONFIG, $file) or die "Cannot open $file: $!\n";
    local $_;
    while (<CONFIG>) {
        chomp;
        warn "$file:$.: unparsable line: $_\n"
            unless /^([^:\s]+): (\S.*)/;
        my ($key, $value) = ($1, $2);
        while ($_ && $value =~ /^\[/ && $value !~ /\]/) {
            $_ = <CONFIG>;
            if (defined $_) {
                chomp;
                $value .= ' ' . $_;
            }
        }
        if ($value =~ /^\[ .* \]\s*$/) {
            my @values;
            $value =~ s/^\[\s*//;
            $value =~ s/\s*\]\s*$/ /;
            while ($value =~ s/^(\"[^\"]+\"|\S+)\s+//) {
                my $element = $1;
                $element =~ s/^\"//;
                $element =~ s/\"\s*$//;
                push (@values, $element);
            }
            $config{$key} = [ @values ];
        } else {
            $value =~ s/^\"//;
            $value =~ s/\"\s*$//;
            $config{$key} = $value;
        }
    }

    # Check boolean options.
    for my $option (qw/pgp non-pgp-drop/) {
        if ($config{$option}) {
            $config{$option} = boolean ($config{$option}, $file, $option);
        }
    }
    return \%config;
}

# Check the key fingerprint of a config entry, assuming that a key ring with
# all of the keys on it is in the keyring subdirectory of the current
# directory.
sub check_fingerprint {
    my ($config) = @_;
    return unless $$config{'key-fingerprint'};
    my $options = '--no-secmem-warning --homedir=keyring --fingerprint';
    $options .= ' --no-permission-warning';
    my $print = `gpg1 $options $$config{'key-id'}`;
    if ($? != 0) {
        warn "Failed to find fingerprint for $$config{'key-id'}\n";
        return;
    }
    $print =~ s/.*Key fingerprint = ([A-F0-9 ]+).*/$1/s;
    unless ($print eq $$config{'key-fingerprint'}) {
        warn "Fingerprint doesn't match for $$config{'key-id'}\n";
        warn "  WAS: $$config{'key-fingerprint'}\n";
        warn "  NOW: $print\n";
    }
}

##############################################################################
# Generation
##############################################################################

# Print a blank comment line if needed.
sub print_space {
    my ($fh) = @_;
    if ($NEEDSPACE) {
        print $fh "#\n";
        $NEEDSPACE = 0;
    }
}

# Print a metadata comment for a hierarchy.
sub print_header {
    my ($fh, $header, $value) = @_;
    print_space ($fh);
    print $fh "# ${header}: $value\n";
}

# Print a comment to control.ctl properly wrapped with the leading comment
# character.  Takes the file descriptor and the comment string.
sub print_comment {
    my ($fh, $comment) = @_;
    print_space ($fh);
    $Text::Wrap::columns = 74;
    $Text::Wrap::unexpand = 0;
    $comment .= "\n" unless $comment =~ /\n\z/;
    $comment = wrap ('# ', '# ', $comment);
    print $fh $comment;
    $NEEDSPACE = 1 if (($comment =~ tr/\n/\n/) > 1);
}

# Given an open file handle and a file, cat the contents of that file into the
# open file handle.  Used to handle special control message configurations and
# the initial header.
sub cat_file {
    my ($fh, $file) = @_;
    my $date = strftime ('%Y-%m-%d', localtime);
    open (FILE, $file) or die "Cannot open $file: $!\n";
    local $_;
    while (<FILE>) {
        s/\@DATE\@/$date/g;
        print $fh $_;
    }
    close FILE;
}

# Generate an entry for a control.ctl file from a hash of configuration
# information.  Takes the file handle to write it to and the configuration
# hash.
sub generate_config {
    my ($fh, $config) = @_;

    # Generate the leading comments.
    print $fh "## $$config{hierarchy} (";
    if    ($$config{type} eq 'local')    { print $fh '*LOCAL* -- '    }
    elsif ($$config{type} eq 'private')  { print $fh '*PRIVATE* -- '  }
    elsif ($$config{type} eq 'defunct')  { print $fh '*DEFUNCT* -- '  }
    elsif ($$config{type} eq 'historic') { print $fh '*HISTORIC* -- ' }
    elsif ($$config{type} eq 'reserved') { print $fh '*RESERVED* -- ' }
    print $fh $$config{description}, ")\n";
    if ($$config{comment}) {
        $NEEDSPACE = 1;
        my $comment = '';
        for my $line (@{ $$config{comment} }) {
            if ($line eq '' && length ($comment) > 0) {
                print_comment ($fh, $comment);
                $comment = '';
                $NEEDSPACE = 1;
            }
            next if $line eq '';
            $comment .= ' ' if length ($comment) > 0;
            $comment .= $line;
        }
        print_comment ($fh, $comment) if length ($comment) > 0;
    }

    # Print out a special comment for unusual hierarchies or public
    # hierarchies with no control message sender.
    if ($$config{type} eq 'historic') {
        $NEEDSPACE = 1;
        my $comment = "This hierarchy is not entirely defunct, but it"
            . " receives very little traffic and is included primarily for"
            . " the sake of completeness.";
        print_comment ($fh, $comment);
    } elsif ($$config{type} eq 'public' && !$$config{sender}
             && !$$config{'newgroup-sender'} && !$$config{'rmgroup-sender'}) {
        $NEEDSPACE = 1;
        my $comment = "This hierarchy is still in use, but it has no"
            . " active maintainer.  Control messages for this hierarchy"
            . " should not be honored without confirming that the sender"
            . " is the new hierarchy maintainer.";
        print_comment ($fh, $comment);
    }

    # Generate the metadata comments.
    for (@{ $$config{contact} || [] }) {
        print_header ($fh, 'Contact', $_);
    }
    print_header ($fh, 'URL', $$config{url}) if $$config{url};
    print_header ($fh, 'Admin group', $$config{'admin-group'})
        if $$config{'admin-group'};
    print_header ($fh, 'Key URL', $$config{'key-url'}) if $$config{'key-url'};
    print_header ($fh, 'Key fingerprint', $$config{'key-fingerprint'})
        if $$config{'key-fingerprint'};
    print_header ($fh, 'Syncable server', $$config{'sync-server'})
        if $$config{'sync-server'};
    my $comment;
    if ($$config{type} eq 'local' || $$config{type} eq 'private') {
        $comment = "For $$config{type} use only";
        if ($$config{contact}) {
            $comment .= ", contact the above address for information.";
        } else {
            $comment .= ".\n";
        }
    } elsif ($$config{type} eq 'defunct') {
        $comment = "This hierarchy is defunct.  Please remove it.";
    }
    print_comment ($fh, $comment) if $comment;

    # Generate the actual rules.
    my $action = 'doit';
    if ($$config{pgp}) {
        print_comment ($fh, '*PGP*   See comment at top of file.');
        if ($$config{'non-pgp-drop'}
            && (!$$config{'newgroup-sender'}
                || $$config{'newgroup-sender'}[0] ne '*')) {
            print_space ($fh);
            print $fh "newgroup:*:$$config{groups}:drop\n";
            print $fh "rmgroup:*:$$config{groups}:drop\n";
        }
        $action = "verify-$$config{'key-id'}";
    }
    if ($$config{type} eq 'public' || $$config{type} eq 'historic') {
        for my $type (qw/checkgroups newgroup rmgroup/) {
            my $sender = $$config{"$type-sender"} || $$config{sender};
            next unless $sender;
            for (@$sender) {
                print_space ($fh);
                print $fh "$type:$_:$$config{groups}:";
                print $fh (/^\*(\@\*\.[^.]+)?$/) ? "doit\n" : "$action\n";
            }
        }
    } elsif ($$config{type} eq 'reserved') {
        print_space ($fh);
        print $fh "checkgroups:*:$$config{groups}:drop\n";
        print $fh "newgroup:*:$$config{groups}:drop\n";
        print $fh "rmgroup:*:$$config{groups}:drop\n";
    } else {
        my $sender = $$config{'newgroup-sender'} || $$config{sender} || ['*'];
        for (@$sender) {
            print_space ($fh);
            print $fh "newgroup:$_:$$config{groups}:mail\n";
        }
        $sender = $$config{'rmgroup-sender'} || $$config{sender} || ['*'];
        for (@$sender) {
            print_space ($fh);
            print $fh "rmgroup:$_:$$config{groups}:$action\n";
        }
    }
    $NEEDSPACE = 0;
}

# Generate a control.ctl file in the current directory by combining the
# forms/control.ctl.pre initial header and fragments generated from the
# contents of the config subdirectory.
sub generate_ctl {
    open (CTL, "> control.ctl") or die "Cannot create control.ctl: $!\n";
    cat_file (\*CTL, 'forms/control.ctl.pre');
    opendir (D, 'config') or die "Cannot open config directory: $!\n";
    my @regular = grep { !/^\.|^CVS/ && $_ ne 'special' } readdir D;
    closedir D;
    opendir (D, 'config/special')
        or die "Cannot open config/special directory: $!\n";
    my @special = grep { !/^\.|^CVS/ } readdir D;
    closedir D;
    my %unique;
    my @hierarchies = grep { !$unique{$_}++ } sort (@regular, @special);
    for (@hierarchies) {
        print CTL "\n";
        if (-f "config/special/$_") {
            cat_file (\*CTL, "config/special/$_");
        } else {
            my $config = parse_config ("config/$_");
            generate_config (\*CTL, $config);
        }
    }
    close CTL;
}

# Generate a listing in the PGPKEYS file, given the file handle to print it
# to, the config hash, and the file name of the key file.
sub generate_keys_entry {
    my ($fh, $config, $key) = @_;
    print $fh "  $$config{hierarchy}\n\n";
    my @sender;
    if ($$config{sender}) {
        @sender = @{ $$config{sender} };
    } elsif ($$config{'rmgroup-sender'}) {
        @sender = @{ $$config{'rmgroup-sender'} };
    }
    for (@sender) {
        print $fh "       Control message sender: $_\n";
    }
    print $fh "       Key User ID: $$config{'key-id'}\n";
    print $fh "       Administrative group: $$config{'admin-group'}\n"
        if $$config{'admin-group'};
    if ($$config{'key-url'}) {
        print $fh "       Check also:\n";
        print $fh "          + $$config{'key-url'}\n";
    }
    print $fh "\n";
    cat_file ($fh, "keys/$key");
}

# Generate the PGPKEYS file in the current directory by combining the
# forms/PGPKEYS.pre header and entries generated from each config fragment
# corresponding to a hierarchy for which we have a key.
sub generate_keys {
    open (KEYS, "> PGPKEYS") or die "Cannot create PGPKEYS: $!\n";
    cat_file (\*KEYS, 'forms/PGPKEYS.pre');
    opendir (D, 'keys') or die "Cannot open keys directory: $!\n";
    my @keys = sort grep { !/^\.|^CVS/ } readdir D;
    closedir D;
    for (@keys) {
        print KEYS "\n         ", '_' x 61, "\n\n";
        my $config = parse_config ("config/$_");
        check_fingerprint ($config);
        generate_keys_entry (\*KEYS, $config, $_);
    }
    close KEYS;
}

# Generate an entry for the HTML documentation for PGP signing, given the file
# handle to print it to, the config hash, and the file name of the key file.
sub generate_html_entry {
    my ($fh, $config, $key) = @_;
    print $fh qq(<A NAME="$key"><HR></A>\n);
    my $hierarchy = $$config{hierarchy};
    $hierarchy =~ s/&/&amp;/g;
    print $fh "<H3>$hierarchy</H3>\n\n";
    my @sender;
    if ($$config{sender}) {
        @sender = @{ $$config{sender} };
    } elsif ($$config{'rmgroup-sender'}) {
        @sender = @{ $$config{'rmgroup-sender'} };
    }
    for (@sender) {
        s/&/&amp;/g;
        s/</&lt;/g;
        s/>/&gt;/g;
        print $fh "Control message sender: $_<BR>\n";
    }
    print $fh "Key User ID: $$config{'key-id'}<BR>\n";
    if ($$config{'admin-group'}) {
        my $group = $$config{'admin-group'};
        my $url = qq(<A HREF="news:$group">$group</A>);
        print $fh "Administrative group: $url<BR>\n";
    }
    if ($$config{'key-url'}) {
        my $url = $$config{'key-url'};
        $url =~ s%(\S+:\S+)%<A HREF=\"$1\">$1</A>%;
        print $fh "Check also:\n";
        print $fh "<UL>\n<LI>$url\n</UL>\n";
    }
    print $fh "\n<PRE>\n";
    cat_file ($fh, "keys/$key");
    print $fh "</PRE>\n";
}

# Generate the README.html file in the current directory by combining the
# forms/README.html.pre header, entries generated from each config fragment
# corresponding to a hierarchy for which we have a key, and the
# forms/README.html.post footer.
sub generate_html {
    open (KEYS, "> README.html") or die "Cannot create README.html: $!\n";
    cat_file (\*KEYS, 'forms/README.html.pre');
    opendir (D, 'keys') or die "Cannot open keys directory: $!\n";
    my @keys = sort grep { !/^\.|^CVS/ } readdir D;
    closedir D;
    for (@keys) {
        print KEYS "\n";
        my $config = parse_config ("config/$_");
        generate_html_entry (\*KEYS, $config, $_);
    }
    cat_file (\*KEYS, 'forms/README.html.post');
    close KEYS;
}

##############################################################################
# Main routine
##############################################################################

# Generate the various files.
generate_ctl;
generate_keys;
generate_html;

##############################################################################
# Documentation
##############################################################################

=head1 NAME

generate-files - Generate config and doc files from hierarchy configs

=head1 SYNOPSIS

B<generate-files>

=head1 DESCRIPTION

B<generate-files> reads hierarchy configuration fragments and PGP keys and
generates F<control.ctl>, F<PGPKEYS>, and F<README.html> (which is an HTML
version of the F<PGPKEYS> file and contains the same information) in the
current directory.  It is meant to be run from the root directory of a
configuration tree for Netnews control messages.

=head1 CONFIG FILE FORMAT

The main source of input to this script is a directory of configuration
fragments in F<config/>.  The format of those fragments is intended to be
compatible with the new configuration syntax supported by INN (but not
generally used within INN, at least yet).  If you have an INN source tree
available, see F<doc/config-*> in the INN source for more details.

Briefly, the format consists of key/value pairs in one of the following
two formats:

    <key>: <value>
    <key>: [ <value> <value> ... ]

In other words, keys take a single value or multiple values enclosed in
square brackets and separated by whitespace.  The second form is called a
vector.  Values that contain colons, whitespace, or double quotes must be
enclosed in double quotes.  Values may not contain double quotes and
escaping isn't implemented inside values.  Boolean values can be set with
C<yes>, C<on>, or C<true> and C<no>, C<off>, or C<false>.

The following keys are recognized:

=over 4

=item hierarchy

The abbreviated hierarchy name, used for the heading of the corresponding
F<control.ctl> entry.  This should conventionally be the group prefix of
the hierarchy without a leading period and in all caps.  If this
configuration file governs multiple hierarchies, each hierarchy should be
listed separated by S<C< & >>; for example, C<FOO & BAR & BAZ> for a
configuration file for C<foo.*>, C<bar.*>, and C<baz.*>.

=item type

The type of the hierarchy.  This should be one of C<public>, C<private>,
C<local>, C<defunct>, C<historic>, or C<reserved>.  Public and historic
hierarchies will generate F<control.ctl> entries that honor the control
messages.  Reserved hierarchies will generate entries that drop all
control messages for that hierarchy.  All others will generate
F<control.ctl> entries with an action of C<mail> for newgroup messages and
an action of C<doit> for rmgroup messages.

=item description

A short English description of the hierarchy.

=item groups

The F<control.ctl> pattern for the affected hierarchies.  This is one or
more wildmat patterns separated by C<|>.

=item sender

A vector of wildmat patterns matching control message senders.  Even if
there is only one control message sender for this hierarchy, it must be
given in the form of a vector (enclosed in square brackets).  This by
default sets the sender pattern for newgroups, rmgroups, and checkgroups,
but can be overridden by the following keys.  It is also used for the
control message sender in the F<PGPKEYS> file.

=item newgroup-sender

Overrides I<sender> and specifies the wildmat patterns matching newgroup
senders.  The value must be a vector.

=item rmgroup-sender

Overrides I<sender> and specifies the wildmat patterns matching rmgroup
senders.  The value must be a vector.

=item checkgroups-sender

Overrides I<sender> and specifies the wildmat patterns matching
checkgroups senders.  The value must be a vector.

=item contact

A vector of contact addresses.  These will be included in Contact comments
in the F<control.ctl> entry.

=item url

A URL for general information about the hierarchy.  Since URLs contain
colons, this must be in double-quotes.  This will be included in a URL
comment in the F<control.ctl> entry.

=item pgp

A boolean value indicating whether this hierarchy uses PGP.  The default
is C<no>.

=item non-pgp-drop

By default, if I<pgp> is set to C<yes>, a rule is added to drop all
non-signed control messages for this hierarchy.  Set this key to C<no> to
disable that behavior.

=item key-id

The user ID of the PGP key.  The control message verification process for
Netnews control messages matches the key ID (only the e-mail address
portion in newer software, but the entire key ID in older software).  In
retrospect, this was a poor design choice and hex key IDs should have been
used instead, but there's a ton of software out there written with that
assumption.  The value of this key should be that key ID, and must not
contain whitespace.  Conventionally, either the e-mail address of the
control message sender or the name of the administrative newsgroup for the
hierarchy is used as the key ID.

=item key-url

A URL from which one can retrieve the PGP key for the hierarchy from some
authoritative source (in other words, not a keyserver).  This value is put
into a Key URL comment in F<control.ctl> and into the F<PGPKEYS> file.
Since URLs contain colons, this value must be enclosed in double quotes.

=item key-fingerprint

The fingerprint of the PGP key.  If this value is set, B<generate-files>
will verify during processing that the PGP key it finds for this hierarchy
in the local keyring matches this fingerprint.  If set, this value is also
included in a Key Fingerprint comment in F<control.ctl>.

=item admin-group

The administrative group for the hierarchy (conventionally the group to
which announcements of new newsgroups and possibly newsgroup lists are
posted).  This value is included in a comment in F<control.ctl> and
F<PGPKEYS> if set.

=item sync-server

A publicly accessible NNTP server that supports the LIST ACTIVE command to
retrieve a list of current newsgroups in this hierarchy, suitable for
pointing B<actsync> at.

=back

Unknown keys are silently ignored.

=head1 FILES

=over 4

=item F<config/*>

The configuration fragments to be used for generating the output files.
They must follow the format described above.

=item F<config/special/*>

Fragments of a F<control.ctl> file that are included verbatim.  If a file
exists in this directory with the same name as one in the F<config/>
directory, the file in this directory is used in preference.  Files in
this directory cannot provide input to the F<PGPKEYS> file, so hierarchies
that require special F<control.ctl> entries but have PGP information
should have a conventional configuration file with the PGP information and
then a special fragment.

=item F<forms/PGPKEYS.pre>

The initial portion of the F<PGPKEYS> file.  Any instances of C<@DATE@>
will be replaced by the current date in YYYY-MM-DD format.

=item F<forms/README.html.pre>

The initial portion of the F<README.html> file.  Any instances of
C<@DATE@> will be replaced by the current date in YYYY-MM-DD format.

=item F<forms/README.html.post>

The final portion of the F<README.html> file.  Any instances of C<@DATE@>
will be replaced by the current date in YYYY-MM-DD format.

=item F<forms/control.ctl.pre>

The initial portion of the F<control.ctl> file.  Any instances of
C<@DATE@> will be replaced by the current date in YYYY-MM-DD format.

=item F<keys/*>

A directory containing PGP keys.  Each file in this directory corresponds
to the hierarchy whose configuration file in F<config/> has the same name.
The contents of the F<keys/*> file is included verbatim in F<PGPKEYS>.

=item F<keyring/>

For each hierarchy configuration containing a I<key-fingerprint>
parameter, B<generate-files> runs B<gpg1> with its home directory pointing
to this directory, asks for the fingerprint of that PGP key, and verifies
that it matches I<key-fingerprint> exactly (including whitespace, so that
value must match GnuPG's output format).  To generate this keyring in
advance of running B<generate-files>, run:

    mkdir keyring
    chmod 700 keyring
    gpg1 --homedir=keyring --import --allow-non-selfsigned-uid keys/*

The B<--allow-non-selfsigned-uid> parameter is only required if you want
to support PGP keys that aren't self-signed.  This is not secure, but some
hierarchy keys are not self-signed.

=back

=head1 AUTHOR

Russ Allbery <eagle@eyrie.org>

=head1 SEE ALSO

actsync(8), control.ctl(5), gpg1(1), uwildmat(3)

This script is part of the control-archive package.  The control-archive web
page at L<https://www.eyrie.org/~eagle/software/control-archive/> will have
the current version of the package.

=cut
