#!/usr/bin/perl

#   Copyright (c) MediaTek USA Inc., 2020
#
#   This program is free software;  you can redistribute it and/or modify
#   it under the terms of the GNU General Public License as published by
#   the Free Software Foundation; either version 2 of the License, or (at
#   your option) any later version.
#
#   This program is distributed in the hope that it will be useful, but
#   WITHOUT ANY WARRANTY;  without even the implied warranty of
#   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
#   General Public License for more details.
#
#   You should have received a copy of the GNU General Public License
#   along with this program;  if not, see
#   <http://www.gnu.org/licenses/>.
#
#
# p4udiff
#
#   This script extracts a unified-diff between two Perforce changelists.
#   See
#      $ p4udiff --help
#

use strict;
use DateTime;
use Getopt::Long;

my $verbose = 0;

package P4File;

use constant {
              PATH       => 0,
              NAME       => 1,
              REVISION   => 2,
              ACTION     => 3,
              CHANGELIST => 4,
              TYPE       => 5,
};

sub new
{
    my ($class, $description) = @_;
    if ($description =~
        m/([^#]+)#([0-9]+)\s+-\s+(\S+)\s+change\s+([0-9]+)\s+\(([^)]+)\).*$/) {

        my $self = [$1, undef, $2, $3, $4, $5];
        my $name = $1;
        $name =~ s/$main::depot_path/$main::sandbox_path/;
        $self->[NAME] = $name;

        return bless $self, $class;
    }
    return undef;
}

sub path
{
    my $self = shift;
    return $self->[PATH];
}

sub name
{
    my $self = shift;
    return $self->[NAME];
}

sub rev
{
    my $self = shift;
    return $self->[REVISION];
}

sub action
{
    my $self = shift;
    return $self->[ACTION];
}

sub changelist
{
    my $self = shift;
    return $self->[CHANGELIST];
}

sub type
{
    my $self = shift;
    return $self->[TYPE];
}

package P4FileList;

use constant {
              HASH    => 0,
              INCLUDE => 1,
              EXCLUDE => 2,
};

sub new
{
    my ($class, $p4tag) = @_;
    my $cmd = "p4 files $p4tag";
    open(HANDLE, "-|", $cmd) or
        die("unable to execute '$cmd': $!");

    my $self = [{}, \@main::include_patterns, \@main::exclude_patterns];
    bless $self, $class;

    while (my $line = <HANDLE>) {
        chomp $line;
        s/\015$//;

        $self->append(P4File->new($line));
    }
    close(HANDLE) or die("unable to close 'p4 files $p4tag' pipe: $!\n");
    if (0 != $?) {
        $? & 0x7F & die("p4 files $p4tag died from signal ", ($? & 0x7F), "\n");
        die("p4 files $p4tag exited with error ", ($? >> 8), "\n");
    }
    return $self;
}

sub include_me
{
    my ($self, $path) = @_;

    foreach my $pat (@{$self->[EXCLUDE]}) {
        return 0
            if $path =~ /$pat/;
    }
    foreach my $pat (@{$self->[INCLUDE]}) {
        return 1
            if ($path =~ /$pat/);
    }
    # true if no include patterns specified (everything included),
    # false if there were no 'include' matches
    return scalar(@{$self->[INCLUDE]}) == 0;
}

sub append
{
    my ($self, $entry, $overwrite) = @_;

    return $self unless (defined($entry) &&
                         $entry->type() ne 'binary');
    my $hash = $self->[HASH];
    my $key  = $entry->path();
    if (!defined($overwrite) && defined($hash->{$key})) {
        warn("WARNING: skipping duplicated path $key\n");
        return $self;
    }
    $hash->{$key} = $entry
        if $self->include_me($key);
    return $self;
}

sub files
{
    my $self = shift;
    return sort keys %{$self->[HASH]};
}

sub get
{
    my ($self, $key) = @_;

    if (!defined($self->[HASH]->{$key})) {
        return undef;
    }
    return $self->[HASH]->{$key};
}

sub remove
{
    my ($self, $key) = @_;

    delete $self->[HASH]->{$key};
    return $self;
}

package main;

our @exclude_patterns;
our @include_patterns;
my $suppress_unchanged;
my $ignore_whitespace;
my $help;

if (!GetOptions("exclude=s"    => \@exclude_patterns,
                "include=s"    => \@include_patterns,
                'b|blank'      => \$ignore_whitespace,
                'no-unchanged' => \$suppress_unchanged,
                'help'         => \$help) ||
    $help ||
    3 != scalar(@ARGV)
) {
    print(STDERR<<EOF
Usage:
  $0 \\
    [(--exclude|include) regexp[,regexp]] [-b] [--no-unchanged] \\
    sandbox_directory base_changelist current_changelist

 sandbox_directory:
          the top level directory in your your perforce
          checkout area.
 base_changelist:
          changelist number or label corresponding to your 'baseline'
          coverage data.
 current_changelist:
          may be a changelist number, label, or or the literal string "sandbox".
          "sandbox" indicates that there may be changes in 'sandbox_directory'
          which have not ben committed to perforce yet.

 -b|--blank:
          ignore whitespace changes.
 --no-unchanged:
          In order to eliminate potential ambiguities caused by multiple
          source files with the same basename, by default, we include
          unchanged file references in the diff report.
          This option removes those entries.
 --include regexp:
          regexp compared to the Perforce path (i.e., starting in
          //p4_repo/...).  If 'include' regexps are specified, then only
          Matching references are included in the report; if no
          'include' regexps are specified, then entries are included unless
          they have been excluded.
 --exclude regexp:
          regexp compared to the Perforce path (i.e., starting in
          //p4_repo/...).  Matching entries are removed from the report.
         'exclude' wins if both exclude and include would match.
EOF
    );
    exit(1);
}
@exclude_patterns = split(',', join(',', @exclude_patterns));
@include_patterns = split(',', join(',', @include_patterns));

my $top_directory   = shift @ARGV;
my $base_changelist = shift @ARGV;
my $curr_changelist = shift @ARGV;

# need "/..." on the pathname if this is a directory.
#  - depot_path may not be the same as the client workspace path.
#  - If the name is local, then append if it is a directory - otherwise, ask P4
# Don't go to P4 unless necessary - as the interaction is pretty slow.

# Ask the Perforce server to lookup the path and append a directory recursion
# pattern if the depot_path is not a file_type.
my $p4_path = $top_directory;
if (-e $p4_path) {
    $p4_path .= "/..."
        if (-d $p4_path);
} else {
    system("p4 fstat $top_directory|grep depotFile >/dev/null 2>&1");
    if (($? >> 8) != 0) {
        $p4_path .= "/...";
    }
}

#my $workspace = `p4 -F \%clientRoot\% -ztag info`;
my $where =
    `p4 where $p4_path`;    # need the "..." in the path or p4 gets confused
$where =~ s/\/\.\.\.//g;
our ($depot_path, $workspace_path, $sandbox_path) = split(' ', $where);

my $base_files = P4FileList->new("${p4_path}\@$base_changelist");
my $curr_files = P4FileList->new(
        $p4_path . ($curr_changelist eq "sandbox" ? "" : "\@$curr_changelist"));

if ($curr_changelist eq 'sandbox') {

    # first, remove all the files that seem to appear in 'current' that are
    # not in baseline

    foreach my $f ($base_files->files()) {
        my $c = $curr_files->get($f);

        print("removing baseline $f\n") if $verbose;
        $base_files->remove($f) unless defined($c);
    }

    # now remove files in current that aren't in baseline
    foreach my $f ($curr_files->files()) {
        my $b = $base_files->get($f);

        print("removing current $f\n") if $verbose;
        $curr_files->remove($f) unless defined($b);
    }

    # now query the sandbox to see what is here...
    my $cmd = "p4 opened $top_directory/...";

    open(HANDLE, "-|", $cmd) or
        die("unable to execute '$cmd': $!");

    while (my $line = <HANDLE>) {
        chomp $line;
        s/\015$//;
        $curr_files->append(P4File->new($line), 1);    # overwrite
    }
    close(HANDLE) or die("unable to close 'p4 opened' pipe: $!\n");
    if (0 != $?) {
        $? & 0x7F & die("p4 opened died from signal ", ($? & 0x7F), "\n");
        die("'p4 opened' exited with error ", ($? >> 8), "\n");
    }
}

my %union;

# prune files at the same rev; no difference to report
foreach my $f ($base_files->files()) {
    my $b = $base_files->get($f);
    my $c = $curr_files->get($f);
    if ($b->action() =~ /delete/ && !(defined($c) && $c->action !~ /delete/)) {
        $curr_files->remove($f);
        print("skipping $f\n") if $verbose;
        next;
    }
    $union{$f} = defined($c) && 'delete' ne $c->action();    # present or not?
}

foreach my $f ($curr_files->files()) {
    my $c = $curr_files->get($f);

    next if (exists($union{$f}));
    $union{$f} = 2;
}

my $diff_cmd = "p4 diff -du ${top_directory}/...\@$base_changelist";
if ($curr_changelist ne 'sandbox') {
    $diff_cmd .= " ${top_directory}/...\@$curr_changelist";
}
open(HANDLE, "-|", $diff_cmd) or
    die("unable to execute $diff_cmd: $!");

my $time =
    '[1-9]{1}[0-9]{3}\-[0-9]{2}\-[0-9]{2} [0-9]{2}:[0-9]{2}:[0-9]{2}(\.[0-9]*)?( .[0-9]+)?';
my $prev_filename;

my $skip = 0;
while (my $line = <HANDLE>) {
    chomp $line;
    s/\015$//;

    # the 'diff' new/old file name line may be followed by a timestamp
    #   If so, remove it so our regexp matches more easily.
    # p4 and git diff outputs do not have the timestamp
    if ($line =~ /^[-+=]{3} \S.*(\s+$time)$/) {
        $line =~ s/\Q$1\E$//;
    }
    if ($line =~ /^--- (.+)$/) {
        # P4 path to file:
        # --- <filename>
        my $p4entry = $1;
        die("unexpected entry $p4entry") unless exists($union{$p4entry});
        delete $union{$p4entry};
        $prev_filename = $1;
        $prev_filename =~ s/$depot_path/$sandbox_path/;
        if ($curr_files->include_me($p4entry)) {
            print("--- $prev_filename\n");
            $skip = 0;
        } else {
            $skip = 1;
        }
    } elsif ($line =~ /^\+\+\+ (.+)$/) {
        my $filename = $1;
        print("+++ $filename\n")
            unless ($skip);
        die("unexpected '$filename' - expected '$prev_filename'")
            unless ($prev_filename eq $filename);
        $prev_filename = undef;
    } else {
        print("$line\n")
            unless $skip;
    }
}
close(HANDLE) or die("unable to close p4 files (current) pipe: $!\n");
if (0 != $?) {
    $? & 0x7F & die("p4 files (current) died from signal ", ($? & 0x7F), "\n");
    die("p4 files (current) exited with error ", ($? >> 8), "\n");
}

foreach my $f (sort keys %union) {
    # excluded file have already been deleted from the file lists - no need to
    #  check again
    if ($union{$f} == 1) {
        # defined in both..
        next if defined($suppress_unchanged);
        my $c = $curr_files->get($f);
        die("oops") unless defined($c);
        my $name = $c->name();
        printf("p4 diff $f#%d $name\n", $c->rev());
        printf("=== %s\n", $name);
    } elsif ($union{$f} == 0) {
        # present in baseline, absent in current: deleted
        my $b    = $base_files->get($f);
        my $name = $b->name();
        printf("p4 diff $f#%d $name\n", $b->rev());
        printf("index %d..0\n", $base_changelist);
        printf("--- %s\n", $b->name());
        printf("+++ /dev/null\n");
        # p4 print -q $b->path() . '#' . $b->rev() |sed -e 's/^/-/'
        my @lines;
        open(HANDLE, "-|", "p4", "print", "-q", $b->path() . '#' . $b->rev())
            or
            die("p4 print failed: $!\n");
        while (my $line = <HANDLE>) {
            chomp $line;
            $line =~ s/^/-/;
            push @lines, $line;
        }
        close(HANDLE) or die("unable to close p4 print pipe: $!\n");
        if (0 != $?) {
            $? & 0x7F & die("p4 print died from signal ", ($? & 0x7F), "\n");
            die("p4 print exited with error ", ($? >> 8), "\n");
        }
        printf("@@ 1,%d 0,0 @@\n", scalar(@lines));
        printf("%s\n", join("\n", @lines));
    } elsif ($union{$f} == 2) {
        # added
        my $c    = $curr_files->get($f);
        my $name = $c->name();
        printf("p4 diff $f#%d $name\n", $c->rev());
        printf("new file mode\n");
        printf("index 0..%d\n", $curr_changelist);
        printf("--- /dev/null\n");
        printf("+++ %s\n", $c->name());
        my $err;

        if (-f $name) {
            $err = "'$name'";
            open(HANDLE, '<', $name) or die("unable open $err: $!");
        } else {
            $err = "'p4 print -q " . $c->path() . '#' . $c->rev() . "' pipe";
            open(HANDLE, '-|', "p4", "print", "-q",
                 $c->path() . '#' . $c->rev()) or
                die("open $err failed: $!");
        }
        my @lines;

        while (my $line = <HANDLE>) {
            chomp $line;
            $line =~ s/^/+/;
            push @lines, $line;
        }
        close(HANDLE) or die("unable to close $err: $!\n");
        if (0 != $?) {
            $? & 0x7F & die("close $err died from signal ", ($? & 0x7F), "\n");
            die("close $err exited with error ", ($? >> 8), "\n");
        }
        printf("@@ 0,0 1,%d @@\n", scalar(@lines));
        printf("%s\n", join("\n", @lines));
    }
}

exit 0;
