#!/usr/bin/perl

use List::MoreUtils qw(uniq sort_by);
use LaTeX::Encode qw(latex_encode);
use Data::Dumper;

# four files: 
# - mlr: input file, unsorted list of messages, format "hash:listname"
#        generated by LaTeX, can contain duplicates
# - mlf: cache of message senders; format: one message per two lines
#        * first line "hash:listname"
#        * second line message sender
#        maintained by this script, can be deleted any time but needs a.g.o
#        access for recreation
# - mls: cache of message subjects; format: one message per two lines
#        * first line "hash:listname"
#        * second line message subject
#        maintained by this script, can be deleted any time but needs a.g.o
#        access for recreation
# - mlt: cache of message dates; format: one message per two lines
#        * first line "hash:listname"
#        * second line message date
#        maintained by this script, can be deleted any time but needs a.g.o
#        access for recreation
# - mld: output file, LaTeX fragment that can be sourced


sub getdata {
  my $list=shift;
  my $hash=shift;
  
  open (my $web, '-|:encoding(UTF-8)', "wget -O - https://archives.gentoo.org/$list/message/$hash");

  my $line, $from, $subject, $date;

  until ($line=~/From:/) {
    $line=<$web>; chomp $line;
  }

  $from=<$web>;
  chomp $from;
  $from=~s/^.*<td>//;
  $from=~s/ &lt;.*$//;
  $from=~s/&quot;//g;
  
  until ($line=~/>Subject:</) {
    $line=<$web>; chomp $line;
  }

  $subject=<$web>;
  chomp $subject;
  $subject=~s/^.*<td><strong>//;
  $subject=~s/<\/strong><\/td>.*$//;
  $subject=~s/\[$list\] ?//;

  until ($line=~/>Date:</) {
    $line=<$web>; chomp $line;
  }

  $date=<$web>;
  chomp $date;
  $date=~s/^.*<td>//;
  $date=~s/<\/td>.*$//;

  return ($from, $subject, $date);
};


# Main code start

# Read the list of messages referenced in the LaTeX file.
open my $mlr, '<', "decisions.mlr";
my @mlrlist = <$mlr>;
close $mlr;
chomp @mlrlist;

my @messages=uniq sort_by { $_ } @mlrlist;

# Read the cache of message senders; this file can be deleted, but recreating or
# updating it requires internet access.
open my $mlf, '<', "decisions.mlf";
my @mlflist = <$mlf>;
close $mlf;
chomp @mlflist;

my %messagefrom=@mlflist;

# Read the cache of message subjects; this file can be deleted, but recreating or
# updating it requires internet access.
open my $mls, '<', "decisions.mls";
my @mlslist = <$mls>;
close $mls;
chomp @mlslist;

my %messagesubject=@mlslist;

# Read the cache of message times; this file can be deleted, but recreating or
# updating it requires internet access.
open my $mlt, '<', "decisions.mlt";
my @mltlist = <$mlt>;
close $mlt;
chomp @mltlist;

my %messagedate=@mltlist;


# Loop through the referenced messages, check if we already have the data,
# and if not fetch and add it.
foreach(@messages) {
  my $msg=$_;
  my ($msghash, $msglist) = split /:/,$msg,2;
  
  print "List $msglist, hash $msghash\n";
  if ($messagesubject{$msg}) {
    print "  Sender  is \"$messagefrom{$msg}\"\n";
    print "  Subject is \"$messagesubject{$msg}\"\n";
    print "  Date    is \"$messagedate{$msg}\"\n";
  } else {
    print "  Data not yet available, fetching it\n";
    my ($from, $subject, $date) = getdata($msglist, $msghash);
    $messagefrom{$msg}=$from;
    $messagesubject{$msg}=$subject;
    $messagedate{$msg}=$date;
    print "    Sender  is \"$messagefrom{$msg}\"\n";
    print "    Subject is \"$messagesubject{$msg}\"\n";
    print "    Date    is \"$messagedate{$msg}\"\n";
  };
};

# Write out the from cache again.
my @mlflistnew= map { $_ => $messagefrom{$_} } sort keys %messagefrom;
open my $mlf, '>', "decisions.mlf";
print $mlf "$_\n" for @mlflistnew;
close $mlf;

# Write out the subject cache again.
my @mlslistnew= map { $_ => $messagesubject{$_} } sort keys %messagesubject;
open my $mls, '>', "decisions.mls";
print $mls "$_\n" for @mlslistnew;
close $mls;

# Write out the date cache again.
my @mltlistnew= map { $_ => $messagedate{$_} } sort keys %messagedate;
open my $mlt, '>', "decisions.mlt";
print $mlt "$_\n" for @mltlistnew;
close $mlt;

# Write out the TeX input file
open my $mld, '>', "decisions.mld";

print $mld '\renewcommand{\gentoomailfrom}[1]{%'."\n";
foreach(@messages) {
  my $msg=$_;
  my ($msghash, $msglist) = split /:/,$msg,2;

  my $from=$messagefrom{$msg};
  print $mld '\ifthenelse{\equal{#1}{'.$msghash.'}}{{'.$from.'}}{}%'."\n";
};
print $mld '}'."\n";

print $mld '\renewcommand{\gentoomailsubject}[1]{%'."\n";
foreach(@messages) {
  my $msg=$_;
  my ($msghash, $msglist) = split /:/,$msg,2;

  my $subject=latex_encode($messagesubject{$msg});
  print $mld '\ifthenelse{\equal{#1}{'.$msghash.'}}{{'.$subject.'}}{}%'."\n";
};
print $mld '}'."\n";

print $mld '\renewcommand{\gentoomaildate}[1]{%'."\n";
foreach(@messages) {
  my $msg=$_;
  my ($msghash, $msglist) = split /:/,$msg,2;

  my $date=latex_encode($messagedate{$msg});
  print $mld '\ifthenelse{\equal{#1}{'.$msghash.'}}{{'.$date.'}}{}%'."\n";
};
print $mld '}'."\n";

close $mld;
