#!/usr/bin/perl

BEGIN {
  $abuild_base_dir = "/usr/share/inst-source-utils";
  unshift @INC, "$abuild_base_dir/modules";
}

use XML::Structured ':bytes';
use ABXML;
use RPMQ;
use Digest;
use Getopt::Std;
use Compress::Zlib;
use Encode;
use FileHandle;
use File::stat;
use Data::Dumper;
use Locale::gettext;
use POSIX;     # Needed for setlocale()
use URI::Escape;


my $checksum_type = "SHA-256";
my $checksum_type_md = "sha256";

# i18n is done via en locale only:
$ENV{'LC_ALL'} = 'en_US.UTF-8';
setlocale(LC_MESSAGES, "en_US.UTF-8");
setlocale(LC_ALL, "en_US.UTF-8");


sub OpenFileRead {
  my $filename = shift;
  my ($FH) = new FileHandle;
  open ($FH, "<", "$filename") || die "ERROR: can't read input file $filename";
  return $FH;
}

sub ReadFileToHash {
  my ($filename) = @_;
  my (%temp);
  my $FH = OpenFileRead( $filename );
  while (<$FH>) {
    chomp ($_);
    last if ( $_ =~ /^:END/ );
    next if ( $_ =~ /^\#/ );
    next if ( $_ =~ /^\s$/ );
    my ($le,$ri) = split (/:/, $_, 2 );
    $le =~ s/^\s*(.*)\s*$/$1/;
    $ri =~ s/^\s*(.*)\s*$/$1/;
    $ri =~ s/\\n/\n/g;
    $temp{$le}=$ri;
  }
  close ($FH);
  return \%temp;
}

sub ReadKeywordFile {
  my ($filename) = @_;
  my (%temp) = %{ReadFileToHash($filename)};
  for my $curpak (keys (%temp)) {
    $temp{$curpak} =~ s/^.*\+Kwd://s;
    $temp{$curpak} =~ s/\-Kwd:.*$//s;
    $temp{$curpak} =~ s/^\n//s;
    $temp{$curpak} =~ s/\n$//s;
    $temp{$curpak} = [split("/n",$temp{$curpak})];
  }
  return \%temp;
}

sub str2utf8 {
  my ($oct) = @_;
  return $oct unless defined $oct;
  return $oct unless $oct =~ /[^\011\012\015\040-\176]/s;
  eval {
    Encode::_utf8_on($oct);
    $oct = encode('UTF-8', $oct, Encode::FB_CROAK);
  };
  if ($@) {
    # assume iso-8859-1
    eval {
      Encode::_utf8_off($oct);
      $oct = encode('UTF-8', $oct, Encode::FB_CROAK);
    };
    if ($@) {
      Encode::_utf8_on($oct);
      $oct = encode('UTF-8', $oct, Encode::FB_XMLCREF);
    }
  }
  Encode::_utf8_off($oct);      # just in case...
  return $oct;
}

sub str2utf8xml {
  my ($oct) = @_;
  return $oct unless defined $oct;
  return $oct unless $oct =~ /[^\011\012\015\040-\176]/s;
  $oct = str2utf8($oct);
  Encode::_utf8_on($oct);
  # xml does not accept all utf8 chars, escape the illegal
  $oct =~ s/([\000-\010\013\014\016-\037\177])/sprintf("&#x%x;",ord($1))/sge;
  $oct =~ s/([\x{d800}-\x{dfff}\x{fffe}\x{ffff}])/sprintf("&#x%x;",ord($1))/sge;
  Encode::_utf8_off($oct);
  return $oct;
}


our $opt_k;
our $opt_e;
our $opt_d;
our $opt_u;
our $opt_h;
our $opt_p;
our $opt_g;

&getopts("k:e:d:upgh") || die "ERROR: No such option. -h for help\n";
&usage if ($opt_h);

sub usage(){
  print "\nUsage: $0 [-h][-u][-v VERSION][-k KWDFILE][-e EULADIR][-d REPOPATH]\n\n";
  print "         -u   use unique filenames\n";
  print "         -p   add diskusage data\n";
  print "         -g   add gpg key tags\n";
  print "         -k = path to keyword file\n";
  print "         -e = path to eula directory\n";
  print "         -d = path to repository\n";
  print "         -h   this text\n";
  exit;
}

sub filedata {
  my ($repo_dir, $filename, $datatype, $unique_filenames) = @_;
  my $result;
  $result->{'type'} = $datatype;
  $result->{'location'}->{'href'} = "repodata/$filename";
  print "INFO: reading $repo_dir/repodata/$filename\n";
  open (F, "<", "$repo_dir/repodata/$filename");
  my $ctx_a = Digest->new($checksum_type);
  $ctx_a->addfile(*F);
  my $checksum_a = $ctx_a->hexdigest();
  close (F);
  $result->{'checksum'} = { 'type' => $checksum_type_md, '_content' => $checksum_a };

  my $stat_a = stat("$repo_dir/repodata/$filename");
  $result->{'timestamp'} = $stat_a->mtime;
  $result->{'size'} = $stat_a->size;

  if ($filename =~ /\.(?:gz|zst)$/) {
    my $decompressor = "gzip";
    $decompressor = "zstd" if $filename =~ /\.zst$/;
    open (F, "-|", "$decompressor -dc $repo_dir/repodata/$filename");
    my $buffer_b = join("", <F>);
    close (F);
    $result->{'open-size'} = length($buffer_b);
    my $ctx_b = Digest->new($checksum_type);
    $ctx_b->add($buffer_b);
    my $checksum_b = $ctx_b->hexdigest();
    $result->{'open-checksum'} = { 'type' => $checksum_type_md, '_content' => $checksum_b };
  }

  if ($unique_filenames) {
    my $target_filename = $checksum_a."-".$filename;
    rename "$repo_dir/repodata/$filename","$repo_dir/repodata/$target_filename";
    $result->{'location'}->{'href'} = "repodata/$target_filename"
  }
  return $result;
}

sub calcdudata {
  my ($rpm, $maxdepth) = @_;
  my %q = RPMQ::rpmq_many($rpm, 1027, 1028, 1030, 1095, 1096, 1116, 1117, 1118);
  if (!$q{1027}) {
    $q{1027} = $q{1117} || [];
    my @di = @{$q{1116} || []};
    $_ = $q{1118}->[shift @di] . $_ for @{$q{1027}};
  }
  my @modes = @{$q{1030} || []};
  my @devs = @{$q{1095} || []};
  my @inos = @{$q{1096} || []};
  my @names = @{$q{1027} || []};
  my @sizes = @{$q{1028} || []};
  my %seen;
  my %dirnum;
  my %subdirnum;
  my %dirsize;
  my %subdirsize;
  my ($name, $first);
  for $name (@names) {
    my $mode = shift @modes;
    my $dev = shift @devs;
    my $ino = shift @inos;
    my $size = shift @sizes;
    # strip leading slash
    # prefix is either empty or ends in /
    $name = "usr/src/packages/$name" unless $name =~ s/^\///;

    # check if regular file
    next if ($mode & 0170000) != 0100000;
    # don't count hardlinks twice
    next if $seen{"$dev $ino"};
    $seen{"$dev $ino"} = 1;

    # rounded size in kbytes
    $size = int ($size / 1024) + 1;

    $name = '' unless $name =~ s/\/[^\/]*$//;
    if (($name =~ tr/\///) < $maxdepth) {
      $dirsize{"$name/"} += $size;
      $dirnum{"$name/"} += 1;
      $subdirsize{"$name/"} ||= 0;    # so we get all keys
    }
    # traverse though path stripping components from the back
    $name =~ s/\/[^\/]*$// while ($name =~ tr/\///) > $maxdepth;

    while ($name ne '') {
      $name = '' unless $name =~ s/\/[^\/]*$//;
      $subdirsize{"$name/"} += $size;
      $subdirnum{"$name/"} += 1;
    }
  }
  my @dulist;
  for $name (sort keys %subdirsize) {
    next unless $dirsize{$name} || $subdirsize{$name};
    $dirsize{$name} ||= 0;
    $subdirsize{$name} ||= 0;
    $dirnum{$name} ||= 0;
    $subdirnum{$name} ||= 0;
    # SUSETAGS: "$name $dirsize{$name} $subdirsize{$name} $dirnum{$name} $subdirnum{$name}";

    # workaround for libsolv parser bug, make sure dir starts with '/'
    my $xname = $name;
    $xname = "/$xname" unless $xname =~ /^\//;
    push @dulist, { 'name' => $xname, 'size' => $dirsize{$name} + $subdirsize{$name}, 'count' => $dirnum{$name} + $subdirnum{$name} };
  }
  return { 'dirs' => { 'dir' => \@dulist } };
}


my $eula_dir;
my $kwd_file;
my $repo_dir;
my $unique_filenames;
my $diskusage;
my $gpgkeys;

$eula_dir = $opt_e if $opt_e;
$kwd_file = $opt_k if $opt_k;
$repo_dir = $opt_d if $opt_d;
$unique_filenames = 1 if $opt_u;
$diskusage = 1 if $opt_p;
$gpgkeys = 1 if $opt_g;

die "no repository specified" unless $repo_dir;
print "no eulas or keywords, translations only\n" unless $eula_dir || $kwd_file;

die "repo dir $repo_dir is not a directory\n" unless -d $repo_dir;
die "repo dir $repo_dir has no repodata subdir \n" unless -d "$repo_dir/repodata";

my $keyword_data;
if ($kwd_file && -f $kwd_file) {
  print "INFO: reading keyword file\n";
  $keyword_data = ReadKeywordFile($kwd_file) if $kwd_file && -f $kwd_file;
}

my $eula_data;
if ($eula_dir && -d $eula_dir) {
  print "INFO: reading eula data\n";
  opendir(DIR,"$eula_dir") || die "ERROR: could not open eula directory\n";
  for (readdir(DIR)) {
    next if /^\./;
    my $pack = $_;
    my $packname = $pack;
    $packname =~ s/\.en$//;
    open(EULA, "<", "$eula_dir/$pack");
    my @eula = <EULA>;
    close (EULA);
    $eula_data->{$packname} = join("",@eula);
  }
  closedir(DIR);
}

print "INFO: reading repomd.xml\n";
open(REPOMD, "<", "$repo_dir/repodata/repomd.xml");
my $repomd_raw = join("",<REPOMD>);
close (REPOMD);
my $repomd = XMLin($ABXML::repomd, $repomd_raw);
my $prim_data = (grep { $_->{'type'} eq 'primary' } @{$repomd->{'data'}})[0];
my $prim_filename = $prim_data->{'location'}->{'href'};

print "INFO: reading primary $prim_filename\n";
my $prim_decompressor = "gzip";
$prim_decompressor = "zstd" if $prim_filename =~ /\.zst$/;
open (F, "-|", "$prim_decompressor -dc $repo_dir/$prim_filename");
my $prim_xml = join("", <F>);
close (F);
my $primary = XMLin($ABXML::primary,$prim_xml);
$prim_xml = "";

print "INFO: processing packages from primary\n";
my @suse;
for my $pack (@{$primary->{'package'}}) {
 my $data;
 my $pkgid = (grep {$_->{'pkgid'}} @{$pack->{'checksum'}})[0]->{'_content'};
 my $pkgpath = $pack->{'location'}->{'href'};
 $data->{'pkgid'} = $pkgid;
 for my $field (qw(name arch version)) {
   $data->{$field} = $pack->{$field};
 }
 if ($pack->{'format'}->{'rpm:provides'}->{'rpm:entry'}) {
   for $p (@{$pack->{'format'}->{'rpm:provides'}->{'rpm:entry'}}) {
       next unless $p->{'name'} eq 'pattern-category()';
       $data->{'pattern-category'} = uri_unescape($p->{'ver'});
       last; # we expect only one category
   }
 }
 if ($keyword_data->{$pack->{'name'}}) {
    push @{$data->{'keyword'}}, { '_content' => $_ } for @{$keyword_data->{$pack->{'name'}}};
 }
 $data->{'eula'} = str2utf8xml($eula_data->{$pack->{'name'}}) if $eula_data && $eula_data->{$pack->{'name'}};
 if ($pkgpath && $diskusage) {
    my $du = calcdudata("$repo_dir/$pkgpath", 3);
    $data->{'diskusage'} = $du if $du;
 }
 next unless $data->{'keyword'} || $data->{'eula'} || $data->{'diskusage'} || $data->{'pattern-category'};
 push @suse, $data;
}

print "INFO: cleanup\n";
my @new_repomd_data = grep { $_->{'type'} !~ /^susedata/ && $_->{'type'} ne 'appdata' && $_->{'type'} ne 'app-icons'} @{$repomd->{'data'}};

# i18n
# get language list, there should be a better way?
print "INFO: i18n\n";
opendir(D, "/usr/share/locale/en_US/LC_MESSAGES/");
my @languages;
while(readdir(D)) {
  if ($_ =~ "^package-translations-\(.*\).mo\$") {
    push @languages, $1;
  }
}
closedir D;

for my $language (sort(@languages)) {
  print "INFO: translate $language\n";
  next if $language eq 'en';
  textdomain("package-translations-$language");
  my @i18n;
  for my $pack (@{$primary->{'package'}}) {
    my $data;
    my $pkgid = (grep {$_->{'pkgid'}} @{$pack->{'checksum'}})[0]->{'_content'};
    my $summary     = $pack->{'summary'}[0]->{'_content'};
    my $description = $pack->{'description'}[0]->{'_content'};
    my $category;
    if ($pack->{'format'}->{'rpm:provides'}->{'rpm:entry'}) {
      for $p (@{$pack->{'format'}->{'rpm:provides'}->{'rpm:entry'}}) {
          next unless $p->{'name'} eq 'pattern-category()';
          $category = uri_unescape($p->{'ver'});
          last; # we expect only one category
      }
    }
    my $i18n_summary     = gettext($summary);
    my $i18n_description = gettext($description);
    my $i18n_category    = $category?gettext($category):undef;

    my $eula_key = $pack->{'name'}.".".$language;
    $data->{'eula'} = str2utf8xml($eula_data->{$eula_key}) if $eula_data && $eula_data->{$eula_key};
    $data->{'summary'} = { 'lang' => $language, '_content' => $i18n_summary } if $summary ne '' && $summary ne $i18n_summary;
    $data->{'description'} = { 'lang' => $language, '_content' => $i18n_description } if $description ne '' && $description ne $i18n_description;
    $data->{'pattern-category'} = { 'lang' => $language, '_content' => $i18n_category } if $category && $category ne $i18n_category;
    next unless $data;

    $data->{'pkgid'} = $pkgid;
    for my $field (qw(name arch version)) {
      $data->{$field} = $pack->{$field};
    }
    push @i18n, $data;
  }

  my $i18ndata = {
   'xmlns' => 'http://linux.duke.edu/metadata/susedata',
   'packages' => scalar(@i18n),
   'package' => \@i18n,
  };
  my $susedata_content = "<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n";
  $susedata_content .= XMLout($ABXML::susedata_i18n, $i18ndata);

  my $gz = gzopen("$repo_dir/repodata/susedata.$language.xml.gz", "wb");
  $gz->gzwrite($susedata_content);
  $gz->gzclose();

  print "INFO: add susedata.$language\n";
  push @new_repomd_data, filedata($repo_dir, "susedata.$language.xml.gz", "susedata.$language", $unique_filenames);
}

print "INFO: writing out susedata\n";
my $susedata = {
 'xmlns' => 'http://linux.duke.edu/metadata/susedata',
 'packages' => scalar(@suse),
 'package' => \@suse,
};

my $susedata_content = "<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n";
$susedata_content .= XMLout($ABXML::susedata, $susedata);

my $gz = gzopen("$repo_dir/repodata/susedata.xml.gz", "wb");
$gz->gzwrite($susedata_content);
$gz->gzclose();

print "INFO: writing out repomd\n";
push @new_repomd_data, filedata($repo_dir, "susedata.xml.gz", 'susedata', $unique_filenames);

push @new_repomd_data, filedata($repo_dir, "appdata.xml.gz", "appdata", $unique_filenames) if -f "$repo_dir/repodata/appdata.xml.gz";
push @new_repomd_data, filedata($repo_dir, "appdata-screenshots.tar", "appdata-screenshots", $unique_filenames) if -f "$repo_dir/repodata/appdata-screenshots.tar";
push @new_repomd_data, filedata($repo_dir, "appdata-icons.tar.gz", "appdata-icons", $unique_filenames) if -f "$repo_dir/repodata/appdata-icons.tar.gz";

$repomd->{'data'} = \@new_repomd_data;

if ($gpgkeys) {
  my @new_repomd_tags_content;
  @new_repomd_tags_content = grep { $_ !~ /^gpg-pubkey/ } @{$repomd->{'tags'}->{'content'}} if $repomd->{'tags'} && $repomd->{'tags'}->{'content'};
  print "INFO: adding gpg key tags\n";
  opendir(DIR,"$repo_dir") || die "ERROR: could not open repo base directory\n";
  for (readdir(DIR)) {
    next unless /^gpg-pubkey-/;
    my $filename = $_;
    my $fpr = "";
    open(GPG,"gpg --no-keyring --no-default-keyring --with-colons --import-options show-only --import --fingerprint < $repo_dir/$_ |") || die "could not call gpg";
    while(<GPG>) {
	    chomp();
	    next unless /^fpr:/;
	    my @gline = split(":",$_);
	    $fpr = "?fpr=$gline[9]";
	    last; # only the main sig, not subkey fingerprints
    }
    close (GPG);
    push @new_repomd_tags_content, "$filename$fpr";
  }
  closedir(DIR);
  $repomd->{'tags'}->{'content'} = \@new_repomd_tags_content if @new_repomd_tags_content;
}

open (REPOMD, ">", "$repo_dir/repodata/repomd.xml");
print REPOMD "<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n";
print REPOMD XMLout($ABXML::repomd, $repomd);
close (REPOMD);

print "INFO: written out repomd\n";

