#!/usr/bin/env escript
%% -*- erlang -*-

%% %CopyrightBegin%
%%
%% SPDX-License-Identifier: Apache-2.0
%%
%% Copyright Ericsson AB 2024-2025. All Rights Reserved.
%%
%% Licensed under the Apache License, Version 2.0 (the "License");
%% you may not use this file except in compliance with the License.
%% You may obtain a copy of the License at
%%
%%     http://www.apache.org/licenses/LICENSE-2.0
%%
%% Unless required by applicable law or agreed to in writing, software
%% distributed under the License is distributed on an "AS IS" BASIS,
%% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
%% See the License for the specific language governing permissions and
%% limitations under the License.
%%
%% %CopyrightEnd%

%% Tool to generate fixes to the OTP SPDX file produced by ORT,
%% and tests to verify that given the scan-results generated by
%% ORT and the OTP SPDX, the OTP SPDX is compliant with SPDX 2.2
%%
%% Notice that ORT produces correct SPDX 2.2, this Erlang/OTP SPDX
%% needs to be futher split into packages. this need comes from users
%% wanting to opt-out of some applications, and also to keep track
%% of which packages are vendor packages/files.
%%
%% Because of this need, it is easy to break the correct SPDX
%% simply by, e.g., adding a package name that contains underscores.
%% To prevent from these issues, we have a test option that checks
%% (to some degree) that the SPDX generate is correct.
%%
%% After this validation, users of this script should still run
%% other validator tools, such as, ntia-conformance-checker.
%%

%%
%% REUSE-IgnoreStart
%%
%% Ignore copyright detection heuristics of REUSE tool in this file.
%% this is needed to avoid REUSE false positives on 'Copyright' variable name.
%%

-include_lib("kernel/include/file.hrl").

-export([test_project_name/1,
         test_name/1,
         test_creators_tooling/1,
         test_spdx_version/1]).

-export([test_minimum_apps/1, test_copyright_not_empty/1, test_filesAnalised/1,
         test_hasFiles_not_empty/1, test_homepage/1,
         test_licenseConcluded_exists/1, test_licenseDeclared_exists/1,
         test_licenseInfoFromFiles_not_empty/1, test_package_names/1,
         test_package_ids/1, test_verificationCode/1, test_supplier_Ericsson/1,
         test_originator_Ericsson/1, test_versionInfo_not_empty/1, test_package_hasFiles/1,
         test_project_purl/1, test_packages_purl/1, test_download_location/1, 
         test_package_relations/1, test_has_extracted_licenses/1,
         test_vendor_packages/1, test_erts/1%%,
         %% test_copyright_format/1, test_files_licenses/1,
        ]).

-define(default_classified_result, "scan-result-classified.json").
-define(default_scan_result, "scan-result.json").
-define(diff_classified_result, "scan-result-diff.json").
-define(erlang_license, ~"Apache-2.0").
-define(spdxref_project_name, ~"SPDXRef-Project-OTP").
-define(spdx_project_name, ~"Erlang/OTP").
-define(spdx_creators_tooling, ~"Tool: otp_compliance").
-define(spdx_supplier, ~"Organization: Ericsson AB").
-define(spdx_download_location, ~"https://github.com/erlang/otp/releases").
-define(spdx_homepage, ~"https://www.erlang.org").
-define(spdx_purl_meta_data, ~"?vcs_url=git+https://github.com/erlang/otp.git").
-define(spdx_version, ~"SPDX-2.2").
-define(otp_version, 'OTP_VERSION'). % file name of the OTP version
-define(spdx_project_purl, #{ ~"comment" => ~"",
                              ~"referenceCategory" => ~"PACKAGE-MANAGER",
                              ~"referenceLocator" => ~"pkg:github/erlang/otp",
                              ~"referenceType" => ~"purl"}).


%% Add more relations if necessary.
-type spdx_relations() :: #{ 'DOCUMENTATION_OF' => [],
                             'CONTAINS' => [],
                             'TEST_OF' => [],
                             'PACKAGE_OF' => []}.

-record(spdx_package, {'SPDXID'           :: unicode:chardata(),
                       'versionInfo'      :: unicode:chardata(),
                       'description'      :: unicode:chardata(),
                       'name'             :: unicode:chardata(),
                       'copyrightText'    :: unicode:chardata(),
                       'filesAnalyzed'    = false :: boolean(),
                       'hasFiles'         = [] :: [unicode:chardata()],
                       'purl'             = false :: false | unicode:chardata(),
                       'homepage'         :: unicode:chardata(),
                       'licenseConcluded' :: unicode:chardata(),
                       'licenseDeclared'  :: unicode:chardata(),
                       'licenseInfoFromFiles' = [] :: [unicode:chardata()],
                       'downloadLocation' = ~"https://github.com/erlang/otp/releases" :: unicode:chardata(),
                       'packageVerificationCode' :: #{ 'packageVerificationCodeValue' => unicode:chardata()},
                       'supplier' = ~"Organization: Ericsson AB" :: unicode:chardata(),
                       'relationships' = #{ 'DOCUMENTATION_OF' => [],
                                            'CONTAINS' => [],
                                            'TEST_OF' => [],
                                            'PACKAGE_OF' => []} :: spdx_relations()
                      }).
-type spdx_package() :: #spdx_package{}.

-record(app_info, { description  :: unicode:chardata(),
                    id           :: unicode:chardata(),
                    vsn          :: unicode:chardata(),

                    %% modules can only be included in one app.
                    %% not_loaded indicates a special handling of this module, e.g., erts.
                    modules      :: [atom()] | not_loaded,
                    applications :: [atom()],
                    included_applications :: [atom()],
                    optional_applications :: [atom()] }).

-type app_info() :: #app_info{}.

%%
%% Commands
%%
%% sbom
%%
%%    otp-info: given an oss-review-toolkit (ORT) scan result and a
%%              source SBOM, it populates the fields that ORT can't
%%              in Unmanaged projects.
%%
%% compliance   useful for CI/CD compliance checks.
%%
%%    detect:   given a scan-result from ORT, it detects files without license
%%              and writes them into disk.
%%
%%    check:    given a recent scan-result from ORT (possibly from PR), and an
%%              existing file with known files without licenses (from prev. commit),
%%              calculate if new files without licenses have been added to the repo.
%%
%% explore
%%
%%    classify: takes as input a scan of ort and returns a json file containing
%%              as keys the licenses and as values the files under those licenses.
%%
%%    diff:     performs a diff of existing classification file against
%%              other classification files. this is useful to guarantee that
%%              files that had license X had not unexpectedly been reported differently.
%%

%%
%% USE OF COMMANDS
%%
%% The commands `classify` and `diff` are useful for exploring the licenses.
%% ORT does not report in an easy way which files have been attached to which licenses,
%% unless one generates a report. At the time, we cannot generate an SBOM,
%% so we are in the dark.
%%
%% The commands `detect` and `check` can be used in CI/CD to
%% prevent entering new files with unknown license. In the normal case,
%% the `detect` command only needs to be issued once in the repo.
%% Once we keep track of this file, the command is not needed anymore,
%% as the list of files with no license should not grow, and only
%% the `check` command should be executed in the CI/CD.
%%
%%

main(Args) ->
    argparse:run(Args, cli(), #{progname => otp_compliance}).

cli() ->
    #{ commands =>
           #{"sbom" =>
                 #{ help => """
                            Contains useful commands to fix an ORT generated source SBOM.

                            """,
                   commands =>
                        #{"otp-info" =>
                              #{ help =>
                                     """
                                     Adds information missing in ORT's Erlang/OTP source SBOM
                                       - Add homepage
                                       - Fixes license of `*.beam` files
                                       - Fixes project name

                                     Example:

                                     > .github/scripts/otp-compliance.es sbom otp-info --sbom-file bom.spdx.json --input-file scan-result.json
                                     """,
                                 arguments => [ sbom_option(),
                                                write_to_file_option(),
                                                input_option() ],
                                 handler => fun sbom_otp/1},

                          "test-file" =>
                              #{ help =>
                                     """
                                     Verify that the produced SBOM satisfies some minimum requirements

                                     Example:

                                     > .github/scripts/otp-compliance.es sbom test-file --sbom-file otp.spdx.json
                                     """,
                                 arguments => [ sbom_option(), ntia_checker() ],
                                 handler => fun test_file/1},

                          "vendor" =>
                              #{ help =>
                                     """
                                     SBoM contains only vendor dependencies

                                     Example:

                                     > .github/scripts/otp-compliance.es sbom vendor --sbom-file otp.spdx.json
                                     """,
                                 arguments => [ sbom_option()],
                                 handler => fun sbom_vendor/1}
                         }},
             "explore" =>
                 #{  help => """
                            Explore license data.
                            Useful to figure out the mapping files-to-licenses.

                            """,
                    commands =>
                        #{"classify-license" =>
                              #{ help =>
                                     """
                                     Classify files by their license group.
                                       - Input file expects a scan-result from ORT.
                                       - Output file shows mapping between licenses and files.
                                         The output file can be fed to the `explore diff` command.

                                     """,
                                 arguments => [ input_option(?default_scan_result),
                                                output_option(?default_classified_result),
                                                apply_excludes(),
                                                apply_curations() ],
                                 handler => fun classify_license/1},
                          "classify-license-copyright" =>
                              #{ help =>
                                     """
                                     Pair files with their copyright and license.
                                     Depends on a `scan-result.json` and the output of the `classify-license`.

                                     """,
                                 arguments => [ input_option(?default_scan_result),
                                                base_file(?default_classified_result),
                                                output_option() ],
                                 handler => fun classify_path_license_copyright/1},

                          "diff" =>
                              #{ help =>
                                     """
                                     Compare against previous license results.
                                       - Input file should be the output of the `classify` command for input and base files.
                                       - Output returns a summary of additions and deletions per license.

                                     """,
                                 arguments => [ input_option(?default_classified_result),
                                                base_file(),
                                                output_option(?diff_classified_result) ],
                                 handler => fun diff/1}
                         }
                  },
             "compliance" =>
                 #{ help => """
                            Commands to enforce compliance policy towards unlicensed files.

                            """,
                    commands =>
                        #{"detect" =>
                              #{ help =>
                                     """
                                     Detects unlicensed files.
                                     - Input file expects a scan-result from ORT.
                                     - Output file is a list of files without license.
                                       The output file can be fed to the `compliance check` command.

                                     """,
                                 arguments => [ input_option(?default_scan_result),
                                                output_option(),
                                                apply_excludes() ],
                                 handler => fun detect_no_license/1},
                          "check" =>
                              #{ help =>
                                     """
                                     Checks that no new unlicensed files have been added.
                                     - Input file expects scan-result from ORT.
                                     - Base file expects output file from `no_license` command.

                                     """,
                                 arguments => [ input_option(?default_scan_result),
                                                base_file(),
                                                apply_excludes(),
                                                output_option() ],
                                 handler => fun check_no_license/1}}}}}.

%%
%% Options
%%
input_option() ->
    #{name => input_file,
      type => binary,
      long => "-input-file"}.


input_option(Default) ->
    (input_option())#{default => Default}.

sbom_option() ->
    #{name => sbom_file,
      type => binary,
      default => "bom.spdx.json",
      long => "-sbom-file"}.

ntia_checker() ->
    #{name => ntia_checker,
      type => boolean,
      default => true,
      long => "-ntia-checker"}.

write_to_file_option() ->
    #{name => write_to_file,
      type => binary,
      default => true,
      long => "-write_to_file"}.

output_option(Default) ->
    #{name => output_file,
      type => binary,
      default => Default,
      long => "-output-file"}.

output_option() ->
    #{name => output_file,
      type => binary,
      required => true,
      long => "-output-file"}.

apply_excludes() ->
    #{name => exclude,
      type => boolean,
      short => $e,
      default => true,
      long => "-apply-excludes"}.

apply_curations() ->
    #{name => curations,
      type => boolean,
      short => $c,
      default => true,
      long => "-apply-curations"}.

base_file() ->
    #{name => base_file,
      type => binary,
      long => "-base-file"}.
base_file(DefaultFile) ->
    #{name => base_file,
      type => binary,
      default => DefaultFile,
      long => "-base-file"}.


%%
%% Commands
%%

sbom_vendor(#{sbom_file  := SbomFile}) ->
    Sbom = decode(SbomFile),
    Spdx = get_vendor_dependencies(Sbom),
    file:write_file(SbomFile, json:format(Spdx)).

get_vendor_dependencies(#{~"packages" := Packages}=Spdx) ->
    AppPackages = create_otp_app_packages(Spdx),
    VendorPackages = create_otp_vendor_packages(Spdx),

    VendorPackageIds = lists:map(fun (#{~"SPDXID" := Id}) -> Id end, VendorPackages),
    OTPPackageIds = lists:map(fun (#{~"SPDXID" := Id}) -> Id end, AppPackages),
    Packages1 = lists:filter(fun (#{~"SPDXID" := Id}) ->
                                     lists:member(Id, VendorPackageIds) andalso not lists:member(Id, OTPPackageIds)
                             end, Packages),
    Spdx#{~"packages" := Packages1}.


sbom_otp(#{sbom_file  := SbomFile, write_to_file := Write, input_file := Input}) ->
    Sbom = decode(SbomFile),
    ScanResults = decode(Input),
    Spdx = improve_sbom_with_info(Sbom, ScanResults),
    case Write of
        true ->
            file:write_file(SbomFile, json:format(Spdx));
            %% Should we not overwritte the given file?
            %% file:write_file("otp.spdx.json", json:format(Spdx));
        false ->
            {ok, Spdx}
    end.

-spec improve_sbom_with_info(Sbom :: map(), ScanResults :: map()) -> Result :: map().
improve_sbom_with_info(Sbom, ScanResults) ->
    FixFuns = sbom_fixing_functions(ScanResults),
    Spdx = lists:foldl(fun ({Fun, Data}, Acc) -> Fun(Data, Acc) end, Sbom, FixFuns),
    package_by_app(Spdx).

sbom_fixing_functions(ScanResults) ->
    Licenses = path_to_license(ScanResults),
    Copyrights = path_to_copyright(ScanResults),
    [{fun fix_project_name/2, ?spdxref_project_name},
     {fun fix_name/2, ?spdx_project_name},
     {fun fix_creators_tooling/2, {?spdx_creators_tooling, ScanResults}},
     {fun fix_supplier/2, ?spdx_supplier},
     {fun fix_download_location/2, ?spdx_download_location},
     {fun fix_project_package_license/2, {Licenses, Copyrights}},
     {fun fix_project_package_version/2, 'OTP_VERSION'},
     {fun fix_has_extracted_license_info/2, extracted_license_info()},
     {fun fix_project_purl/2, ?spdx_project_purl},
     {fun fix_beam_licenses/2, {Licenses, Copyrights}} ].

fix_project_name(ProjectName, #{ ~"documentDescribes" := [ ProjectName0 ],
                                 ~"packages" := Packages}=Sbom) ->
    Packages1 = [begin
                     case maps:get(~"SPDXID", Package) of
                         ProjectName0 ->
                             Package#{~"SPDXID" := ProjectName};
                         _ ->
                             Package
                     end
                 end || Package <- Packages],
    Sbom#{ ~"documentDescribes" := [ ProjectName ], ~"packages" := Packages1}.

fix_name(Name, Sbom) ->
    Sbom#{ ~"name" => Name}.

fix_creators_tooling({Tool, #{~"repository" := #{~"vcs_processed" := #{~"revision" := Version}}}},
                      #{ ~"creationInfo" := #{~"creators" := [ORT | _]}=Creators}=Sbom) ->
    SHA = string:trim(<<".sha.", Version/binary>>),
    Sbom#{~"creationInfo" := Creators#{ ~"creators" := [ORT, <<Tool/binary, SHA/binary>>]}}.

fix_supplier(_Name, #{~"packages" := [ ] }=Sbom) ->
    io:format("[warn] no packages available!~n"),
    Sbom;
fix_supplier(Name, #{~"packages" := [_ | _]=Packages }=Sbom) ->
    Sbom#{~"packages" := [maps:update_with(~"supplier", fun(_) -> Name end, Name, Package) || Package <- Packages]}.

fix_download_location(_Url, #{~"packages" := [ ] }=Sbom) ->
    io:format("[warn] no packages available!~n"),
    Sbom;
fix_download_location(Url, #{~"packages" := [ _ | _ ]=Packages }=Sbom) ->
    PackagesUpdated = [ Package#{~"downloadLocation" := Url } || Package <- Packages],
    Sbom#{~"packages" := PackagesUpdated}.

fix_project_package_license(_, #{ ~"documentDescribes" := [RootProject],
                                  ~"packages" := Packages}=Spdx) ->
    Packages1= [case maps:get(~"SPDXID", Package) of
                    RootProject ->
                        Licenses = remove_invalid_spdx_licenses(maps:get(~"licenseDeclared", Package)),
                        Package#{ ~"homepage" := ~"https://www.erlang.org",
                                  ~"licenseConcluded" := binary:join(Licenses, ~" AND ")};
                    _ ->
                        Package
                end || Package <- Packages],
    Spdx#{~"packages" := Packages1}.

remove_invalid_spdx_licenses(Licenses) when is_list(Licenses) ->
    lists:foldl(fun (L, Acc) ->
                        remove_invalid_spdx_licenses(L) ++ Acc
                end, [], Licenses);
remove_invalid_spdx_licenses(Licenses) when is_binary(Licenses) ->
    lists:filter(fun (~"NONE") -> false;
                     (~"NOASSERTION") -> false;
                     (_) -> true
                 end, string:split(Licenses, ~" AND ", all)).

fix_project_package_version(_, #{ ~"documentDescribes" := [RootProject],
                                  ~"packages" := Packages}=Spdx) ->
    OtpVersion = get_otp_version(),
    Packages1= [case maps:get(~"SPDXID", Package) of
                    RootProject ->
                        Package#{ ~"versionInfo" := OtpVersion };
                    _ ->
                        Package
                end || Package <- Packages],
    Spdx#{~"packages" := Packages1}.

get_otp_version() ->
    {ok, Content} = file:read_file(?otp_version),
    string:trim(Content).

fix_project_purl(#{~"referenceLocator" := RefLoc}=Purl, #{ ~"documentDescribes" := [RootProject],
                          ~"packages" := Packages}=Spdx) ->
    Packages1= [case maps:get(~"SPDXID", Package) of
                    RootProject ->
                        VersionInfo = maps:get(~"versionInfo", Package),
                        Purl1 = Purl#{~"referenceLocator" := <<RefLoc/binary, "@", VersionInfo/binary>>},
                        Package#{ ~"externalRefs" => [Purl1]};
                    _ ->
                        Package
                end || Package <- Packages],
    Spdx#{~"packages" := Packages1}.

otp_purl(Name, VersionInfo) ->
    Metadata = ?spdx_purl_meta_data,
    <<"pkg:otp/", Name/binary, "@", VersionInfo/binary, Metadata/binary>>.

fix_has_extracted_license_info(MissingLicenses, #{~"hasExtractedLicensingInfos" := LicenseInfos,
                                                   ~"packages" := Packages,
                                                  ~"documentDescribes" := [RootProject]}=Spdx) ->
    ExtractedLicenses = [maps:get(~"licenseId", ExtractedLicense) || ExtractedLicense <- LicenseInfos ],
    MissingExtractedLicenses =
        lists:foldl(fun (Package, Acc) ->
                            case maps:get(~"SPDXID", Package) of
                                RootProject ->
                                    %% list of SPDX identifier
                                    InfoFromFiles = maps:get(~"licenseInfoFromFiles", Package),

                                    %% Licenses is a list of tuples (Spdx Id, License Text)
                                    Licenses = lists:filter(
                                                 fun (License) ->
                                                         %% License must be used, and not already extracted.
                                                         %% this only makes sense for LicenseRef-XXXX
                                                         lists:member(element(1, License), InfoFromFiles) andalso
                                                             not lists:member(element(1, License), ExtractedLicenses)
                                                 end, MissingLicenses),
                                    Licenses ++ Acc;
                              _ ->
                                  Acc
                          end
                  end, [], Packages),
    AddAllExtractedLicenses = lists:foldl(fun ({K, V}, Acc) ->
                                                  [#{~"extractedText" => V, ~"licenseId" => K} | Acc]
                                          end, LicenseInfos, MissingExtractedLicenses),
    Spdx#{~"hasExtractedLicensingInfos" := AddAllExtractedLicenses}.

-spec create_externalRef_purl(Desc :: binary(), Purl :: binary()) -> map().
create_externalRef_purl(Description, Purl) ->
    #{ ~"comment" => Description,
       ~"referenceCategory" => ~"PACKAGE-MANAGER",
       ~"referenceLocator" => Purl,
       ~"referenceType" => ~"purl"}.

%% re-populate licenses to .beam files from their .erl files
%% e.g., the lists.beam file should have the same license as lists.erl
fix_beam_licenses(_LicensesAndCopyrights, #{ ~"packages" := []}=Sbom) ->
    io:format("[warn] no packages available!~n"),
    Sbom;
fix_beam_licenses(LicensesAndCopyrights,
                  #{ ~"files"   := Files}=Sbom) ->

    Files1= lists:map(
              fun (SPDX) ->
                      %% Adds license and copyright from .erl or .hrl file to its .beam equivalent
                      case SPDX of
                          #{~"fileName" := <<"lib/stdlib/uc_spec/", _Filename/binary>>,
                            ~"licenseInfoInFiles" := [License]}  when License =/= ~"NONE", License =/= ~"NOASSERTION"->
                              files_have_no_license(SPDX#{~"licenseConcluded" := License});

                          #{~"fileName" := ~"bootstrap/lib/stdlib/ebin/erl_parse.beam"} ->
                              %% beam file auto-generated from grammar file
                              Spdx1 = fix_beam_spdx_license(~"lib/stdlib/src/erl_parse.yrl", LicensesAndCopyrights, SPDX),
                              Spdx2 = files_have_no_license(Spdx1),
                              add_license_comment(Spdx2);

                          #{~"fileName" := ~"bootstrap/lib/stdlib/ebin/unicode_util.beam"} ->
                              %% follows from otp/lib/stdlib/uc_spec/README-UPDATE.txt
                              Spdx1 = files_have_no_license(SPDX#{~"licenseConcluded" := ~"Unicode-3.0 AND Apache-2.0"}),
                              add_license_comment(Spdx1);

                          #{~"fileName" := Filename} when
                                Filename =:= ~"erts/emulator/zstd/COPYING";
                                Filename =:= ~"erts/emulator/zstd/LICENSE";
                                Filename =:= ~"erts/emulator/ryu/LICENSE-Apache2";
                                Filename =:= ~"erts/emulator/ryu/LICENSE-Boost";
                                Filename =:= ~"lib/eldap/LICENSE";
                                Filename =:= ~"erts/lib_src/yielding_c_fun/test/examples/sha256_erlang_nif/c_src/sha-2/LICENSE";
                                Filename =:= ~"erts/lib_src/yielding_c_fun/test/examples/sha256_erlang_nif/LICENSE" ->
                              %% license files have comment stating they are license files.
                              SPDX#{~"comment" => ~"license file"};

                          #{~"fileName" := <<"FILE-HEADERS/", Filename/binary>>} when Filename =/= ~"README.md" ->
                              %% license files have comment stating they are license files.
                              %% this cannot be encoded in .ort.yml as it does not allow to add comments
                              SPDX#{~"comment" => ~"license file"};  % TODO: remove this later ~"licenseInfoInFiles" := [~"NOASSERTION"]};

                          #{~"fileName" := <<"LICENSES/", _Filename/binary>>} ->
                              %% license files have comment stating they are license files.
                              SPDX#{~"comment" => ~"license file", ~"licenseInfoInFiles" := [~"NOASSERTION"]};

                          #{~"fileName" := Filename} ->
                              case bootstrap_mappings(Filename) of
                                  {error, not_beam_file} ->
                                      fix_spdx_license(SPDX);
                                  {Path, Filename1} ->
                                      case binary:split(Filename1, ~".beam") of
                                          [File, _] ->
                                              Spdx1 = fix_beam_spdx_license(Path, File, LicensesAndCopyrights, SPDX),
                                              Spdx2 = files_have_no_license(Spdx1),
                                              add_license_comment(Spdx2);
                                          _ ->
                                              fix_spdx_license(SPDX)
                                      end
                              end
                          end
              end, Files),
    Sbom#{ ~"files" := Files1}.

bootstrap_mappings(<<"bootstrap/lib/compiler/ebin/", Filename/binary>>) -> {~"lib/compiler/src/", Filename};
bootstrap_mappings(<<"bootstrap/lib/kernel/ebin/",Filename/binary>>) -> {<<"lib/kernel/src/">>, Filename};
bootstrap_mappings(<<"bootstrap/lib/kernel/include/",Filename/binary>>) -> {<<"lib/kernel/include/">>, Filename};
bootstrap_mappings(<<"bootstrap/lib/stdlib/ebin/",Filename/binary>>) -> {<<"lib/stdlib/src/">>, Filename};
bootstrap_mappings(<<"erts/preloaded/ebin/",Filename/binary>>) -> {<<"erts/preloaded/src/">>, Filename};
bootstrap_mappings(_Other) ->
    {error, not_beam_file}.


%% fixes spdx license of beam files
fix_beam_spdx_license(Path, {Licenses, Copyrights}, SPDX) ->
    License = maps:get(Path, Licenses, ~"NOASSERTION"),
    Copyright = maps:get(Path, Copyrights, ~"NOASSERTION"),
    fix_spdx_license(SPDX#{ ~"copyrightText" := Copyright, ~"licenseConcluded" := License }).

fix_beam_spdx_license(Path, File, LicensesAndCopyrights, SPDX) when is_binary(Path),
                                                                    is_binary(File) ->
    Spdx0 = fix_beam_spdx_license(<<Path/binary, File/binary, ".erl">>, LicensesAndCopyrights, SPDX),
    case maps:get(~"licenseConcluded", Spdx0) of
        ~"NOASSERTION" ->
            fix_beam_spdx_license(<<Path/binary, File/binary, ".hrl">>, LicensesAndCopyrights, Spdx0);
        _ ->
            Spdx0
    end.

files_have_no_license(Spdx) ->
    Spdx#{~"licenseInfoInFiles" := [~"NONE"]}.

none_to_noassertion(~"NONE") ->
    ~"NOASSERTION";
none_to_noassertion(X) ->
    X.

add_license_comment(#{~"licenseConcluded" := Concluded,
                     ~"licenseInfoInFiles" := [License]}=Spdx)
  when (Concluded =:= ~"NOASSERTION" orelse Concluded =:= ~"NONE") andalso License =/= Concluded ->
    Spdx#{~"licenseComments" => ~"BEAM files preserve their *.erl license"};
add_license_comment(Spdx) ->
    Spdx.


%% fixes spdx license of non-beam files
fix_spdx_license(#{~"licenseInfoInFiles" := [LicenseInFile],
                   ~"licenseConcluded" := License,
                   ~"copyrightText" := C}=SPDX) ->
    License1 = case License of
                   ~"NONE" -> LicenseInFile;
                   ~"NOASSERTION" -> LicenseInFile;
                   Other -> Other
               end,
    ConcludedLicense = none_to_noassertion(License1),
    SPDX#{ ~"licenseConcluded" := ConcludedLicense,
           ~"copyrightText" := none_to_noassertion(C) };
fix_spdx_license(#{~"licenseInfoInFiles" := Licenses}=SPDX) when length(Licenses) > 1 ->
    Licenses1 = lists:map(fun erlang:binary_to_list/1, Licenses),
    LicensesBin = erlang:list_to_binary(lists:join(" AND ", Licenses1)),
    fix_spdx_license(SPDX#{ ~"licenseInfoInFiles" := [LicensesBin] });
fix_spdx_license(#{~"copyrightText" := C}=SPDX) ->
    SPDX#{ ~"copyrightText" := none_to_noassertion(C)}.

%% Given an input file, returns a mapping of
%% #{filepath => license} for each file path towards its license.
-spec path_to_license(Input :: map()) -> #{Path :: binary() => License :: binary()}.
path_to_license(Input) ->
    match_path_to(Input, fun group_by_licenses/3).

-spec path_to_copyright(Input :: map()) -> #{Path :: binary() => License :: binary()}.
path_to_copyright(Input) ->
    match_path_to(Input, fun group_by_copyrights/3).

-spec match_path_to(Input :: map(), GroupFun :: fun()) -> #{ Path :: binary() => Result :: binary() }.
match_path_to(Json, GroupFun) ->
    Exclude = true,
    Curations = false,
    GroupedResult = GroupFun(Json, Exclude, Curations),
    maps:fold(fun (K, Vs, Acc) ->
                      maps:merge(maps:from_keys(Vs, K), Acc)
              end, #{}, GroupedResult).

%%
%% Explore command
%%
classify_license(#{output_file := Output,
                   input_file := Filename,
                   exclude := ApplyExclude,
                   curations := ApplyCuration}) ->
    Json = decode(Filename),
    R = group_by_licenses(Json, ApplyExclude, ApplyCuration),
    ok = file:write_file(Output, json:encode(R)).

classify_path_license_copyright(#{output_file := Output,
                     input_file := Filename,
                     base_file  := LicenseFileGroup}) ->
    Copyrights = classify_copyright_result(Filename),
    Licenses = expand_license_result(LicenseFileGroup),
    Files = lists:sort(lists:uniq(maps:keys(Copyrights) ++ maps:keys(Licenses))),
    X = lists:foldl(fun (Path, Acc) ->
                          Copyright = maps:get(Path, Copyrights, ~"NONE"),
                          License = maps:get(Path, Licenses, ~"NONE"),
                          Acc#{Path => #{ ~"Copyright" => Copyright, ~"License" => License}}
                    end, #{}, Files),
    ok = file:write_file(Output, json:encode(X)).

expand_license_result(Filename) ->
    Json = decode(Filename),
    maps:fold(fun (License, Paths, Acc) ->
                      maps:merge(Acc, maps:from_list([{Path, License} || Path <- Paths]))
              end, #{}, Json).

classify_copyright_result(Filename) ->
    Json = decode(Filename),
    Copyrights = copyrights(scan_results(Json)),
    lists:foldl(fun (Copyright, Acc) ->
                        #{<<"statement">> := CopyrightSt, <<"location">> := Location} = Copyright,
                        #{<<"path">> := Path, <<"start_line">> := _StartLine, <<"end_line">> := _EndLine} = Location,
                        Acc#{Path => CopyrightSt}
                    end, #{}, Copyrights).

-spec group_by_licenses(map(), boolean(), boolean()) -> #{License :: binary() => [Path :: binary()]}.
group_by_licenses(Json, ApplyExclude, ApplyCuration) ->
    Excludes = apply_excludes(Json, ApplyExclude),
    Curations = apply_curations(Json, ApplyCuration),

    Licenses = licenses(scan_results(Json)),
    lists:foldl(fun (License, Acc) ->
                            group_by_license(Excludes, Curations, License, Acc)
                    end, #{}, Licenses).

group_by_copyrights(Json, ApplyExclude, _ApplyCuration) ->
    Excludes = apply_excludes(Json, ApplyExclude),
    Copyrights = copyrights(scan_results(Json)),
    lists:foldl(fun (Copyright, Acc) ->
                            group_by_copyright(Excludes, Copyright, Acc)
                    end, #{}, Copyrights).


apply_excludes(Json, ApplyExclude) ->
    onlyif([], ApplyExclude, fun () -> convert_excludes(excludes(Json)) end).

apply_curations(Json, ApplyCuration) ->
    onlyif([], ApplyCuration, fun () -> curations(Json) end).

diff(#{input_file := InputFile, base_file := BaseFile, output_file := Output}) ->
    Input = decode(InputFile),
    Base = decode(BaseFile),
    KeyList = maps:keys(Input) ++ maps:keys(Base),
    KeySet = sets:from_list(KeyList),
    Data = sets:fold(fun(Key, Acc) -> set_difference(Key, Input, Base, Acc) end, #{}, KeySet),
    file:write_file(Output, json:encode(Data)).

detect_no_license(#{input_file := InputFile,
                    output_file := OutputFile,
                    exclude := ApplyExcludes}) ->
    Input = decode(InputFile),
    SortedResult = compute_unlicense_files(Input, ApplyExcludes),
    file:write_file(OutputFile, json:encode(SortedResult)).

compute_unlicense_files(Input, ApplyExcludes) ->
    Licenses = licenses(scan_results(Input)),

    PathsWithLicense =
        lists:foldl(fun (#{<<"location">> := #{<<"path">> := Path}}, Acc) ->
                            sets:add_element(Path, Acc)
                    end, sets:new(), Licenses),

    %% Get all files, incluiding those without license
    Files = files_from_scanner(Input),
    AllPaths =
        lists:foldl(fun (#{<<"path">> := Path}, Acc) ->
                            sets:add_element(Path, Acc)
                    end, sets:new(), Files),

    %% Paths without license
    PathsWithoutLicense = sets:to_list(sets:subtract(AllPaths, PathsWithLicense)),

    %% Excluded files that should be ignored
    Excludes = excludes(Input),
    ExcludeRegex = onlyif([], ApplyExcludes, fun () -> convert_excludes(Excludes) end),
    Result = lists:foldl(fun(Path, Acc) ->
                                 case exclude_path(Path, ExcludeRegex) of
                                     true ->
                                         Acc;
                                     false ->
                                         [Path | Acc]
                                 end
                         end, [], PathsWithoutLicense),
    lists:sort(Result).

check_no_license(#{input_file := InputFile,
                   base_file := BaseFile,
                   exclude := ApplyExcludes,
                   output_file := OutputFile}) ->
    UnlicenseNew = compute_unlicense_files(decode(InputFile), ApplyExcludes),
    Unlicense = decode(BaseFile),
    UnlicenseSet = sets:from_list(Unlicense),
    UnlicenseNewSet =  sets:from_list(UnlicenseNew),
    Result = sets:to_list(sets:subtract(UnlicenseNewSet, UnlicenseSet)),
    file:write_file(OutputFile, json:encode(Result)).


%%
%% Helper functions
%%

excludes(Input) ->
    try
        #{<<"repository">> :=
              #{<<"config">> :=
                    #{<<"excludes">> := #{<<"paths">> := Excludes}}}} = Input,
        Excludes
    catch
        _:_ ->
            []
    end.


curations(Input) ->
    #{<<"repository">> :=
          #{<<"config">> :=
                #{<<"curations">> := #{<<"license_findings">> := Curations}}}} = Input,
    Curations.

scan_results(Input) ->
    #{<<"scanner">> := #{<<"scan_results">> := ScanResults}} = Input,
    ScanResult = hd(ScanResults),
    NewSummary =
        lists:foldl(fun(#{ ~"summary" := #{ ~"licenses" := Licenses, ~"copyrights" := Copyrights}}, Acc) ->
            Acc#{ ~"licenses" := Licenses ++ maps:get(~"licenses", Acc),
                ~"copyrights" := Copyrights ++ maps:get(~"copyrights", Acc) }
        end, maps:get(~"summary",ScanResult), tl(ScanResults)),
    ScanResult#{ ~"summary" := NewSummary }.

licenses(Input) ->
    #{<<"summary">> := #{<<"licenses">> := Licenses}} = Input,
    Licenses.

copyrights(Input) ->
    #{<<"summary">> := #{<<"copyrights">> := Copyrights}} = Input,
    Copyrights.


files_from_scanner(Input) ->
    #{<<"scanner">> := #{<<"files">> := [#{<<"files">> := Files}]}} = Input,
    Files.

set_difference(Key, Input, Base, Acc) ->
    InputValues = sets:from_list(maps:get(Key, Input, [])),
    BaseValues = sets:from_list(maps:get(Key, Base, [])),
    Additions = sets:subtract(InputValues, BaseValues),
    Deletions = sets:subtract(BaseValues, InputValues),
    Acc#{Key => #{addition => sets:to_list(Additions), deletions => sets:to_list(Deletions)}}.

onlyif(_Default, true, Command) -> Command();
onlyif(Default, false, _Command) -> Default.

decode(Filename) ->
    {ok, Bin} = file:read_file(Filename),
    json:decode(Bin).

decode_without_spdx_license(Filename) ->
    {ok, Bin} = file:read_file(Filename),

    %% remove comments
    Lines = string:split(Bin, "\n", all),
    Lines1 = lists:map(fun (Line) -> re:replace(Line, "^//.*", "", [global]) end, Lines),
    Bin1 = erlang:iolist_to_binary(Lines1),

    json:decode(Bin1).

group_by_license(ExcludeRegexes, Curations, License, Acc) ->
    #{<<"license">> := LicenseName, <<"location">> := Location} = License,
    #{<<"path">> := Path, <<"start_line">> := _StartLine, <<"end_line">> := _EndLine} = Location,
    maybe
        false ?= exclude_path(Path, ExcludeRegexes),
        LicenseName1 = curated_path_license(LicenseName, Path, Curations),
        case maps:get(LicenseName1, Acc, []) of
            [] ->
                Acc#{LicenseName1 => [Path]};
            Ls ->
                Ls1 = case lists:search(fun(X) -> X == Path end, Ls) of
                          false -> [Path | Ls];
                          _ -> Ls
                      end,
                Acc#{LicenseName1 => Ls1}
        end
    else
        _ ->
            Acc
    end.

group_by_copyright(ExcludeRegexes, Copyright, Acc) ->
    #{<<"statement">> := CopyrightSt, <<"location">> := Location} = Copyright,
    #{<<"path">> := Path, <<"start_line">> := _StartLine, <<"end_line">> := _EndLine} = Location,
    maybe
        false ?= exclude_path(Path, ExcludeRegexes),
        case maps:get(CopyrightSt, Acc, []) of
            [] ->
                Acc#{CopyrightSt => [Path]};
            Ls ->
                Ls1 = case lists:search(fun(X) -> X == Path end, Ls) of
                          false -> [Path | Ls];
                          _ -> Ls
                      end,
                Acc#{CopyrightSt => Ls1}
        end
    else
        _ ->
            Acc
    end.

convert_excludes(Excludes) ->
    lists:map(fun (#{<<"pattern">> := Pattern}) ->
                      Pattern1 = re:replace(Pattern, <<"\\.">>, <<"\\\\.">>, [global, {return, binary}]),
                      re:replace(Pattern1, <<"\\*\\*">>, <<".*">>, [global, {return, binary}])
              end, Excludes).

exclude_path(_Path, []) ->
    false;
exclude_path(Path, ExcludeRegexes) ->
    lists:any(fun (Regex) ->
                      case re:run(Path, Regex) of
                          {match, _} -> true;
                          _ -> false
                      end
              end, ExcludeRegexes).

curated_path_license(Name, _Path, []) -> Name;
curated_path_license(_Name, Path, [#{<<"path">> := Path}=Cur | _Curations]) ->
    maps:get(<<"concluded_license">>, Cur);
curated_path_license(Name, Path, [_Cur | Curations]) ->
    curated_path_license(Name, Path, Curations).

%% fixes the Spdx to split Spdx by app, and adds vendor dependencies
package_by_app(Spdx) ->
    %% add App packages, e.g., stdlib, erts, ssh, ssl
    AppSrcFiles = find_app_src_files("."),
    PackageTemplates = generate_spdx_mappings(AppSrcFiles),
    Packages = generate_spdx_packages(PackageTemplates, Spdx),
    AppPackages = lists:map(fun create_spdx_package/1, Packages),
    Spdx1 = add_packages(AppPackages, Spdx),
    Spdx2 = create_otp_relationships(Packages, PackageTemplates, Spdx1),

    %% create vendor packages
    VendorPackages = create_otp_vendor_packages(Spdx2),

    %% Remove possible duplicates of vendor packages
    {NewVendorPackages, Spdx3} = remove_duplicate_packages(VendorPackages, Spdx2),

    SpdxWithVendor = add_packages(NewVendorPackages, Spdx3),
    create_vendor_relations(NewVendorPackages, SpdxWithVendor).

create_otp_app_packages(Spdx) ->
    AppSrcFiles = find_app_src_files("."),
    PackageTemplates = generate_spdx_mappings(AppSrcFiles),
    Packages = generate_spdx_packages(PackageTemplates, Spdx),
    lists:map(fun create_spdx_package/1, Packages).

create_otp_vendor_packages(Spdx) ->
    VendorSrcFiles = find_vendor_src_files("."),
    VendorInfoPackage = generate_vendor_info_package(VendorSrcFiles),
    generate_spdx_vendor_packages(VendorInfoPackage, Spdx).

create_otp_relationships(Packages, PackageTemplates, Spdx) ->
    Spdx1 = create_package_relationships(Packages, Spdx),
    Spdx2 = create_depends_on_relationships(PackageTemplates, Spdx1),
    create_opt_depency_relationships(PackageTemplates, Spdx2).

-spec add_packages(Packages :: [spdx_package()], Spdx :: map()) -> SpdxResult :: map().
add_packages(AppPackages, Spdx) ->
    #{~"packages" := SpdxPackages}=Spdx1 = remove_package_files_from_project(Spdx, AppPackages),
    Spdx1#{~"packages" := SpdxPackages ++ AppPackages}.

%% Removes duplicate packages and adds a comment for existing vendor Packages in SPDX
%% it also remove files in top-level directories and they onyly exist  in vendor libraries
-spec remove_duplicate_packages(VendorPackages :: map(), Spdx2 :: map()) -> {ResultVendorPackages :: map(), SPDX :: map()}.
remove_duplicate_packages(VendorPackages, #{~"packages" := Packages}=Spdx) ->
    #{~"vendor" := Vendors, ~"app" := Apps} =
        lists:foldl(fun (#{~"SPDXID" := VendorId}=Vendor, #{~"vendor" := Vcc, ~"app" := Apc}=Acc) ->
                            case lists:search(fun (#{~"SPDXID" := Id}) -> VendorId == Id end, Packages) of
                                {value, P} ->
                                    Packages1 = Apc -- [P],
                                    Comment = maps:get(~"comment", P, ~""),
                                    Acc#{~"app" := [P#{~"comment" => <<Comment/binary, " vendor package">>} | Packages1]};
                                _ ->
                                    Acc#{~"vendor" := [Vendor | Vcc]}
                            end
                    end, #{~"vendor" => [], ~"app" => Packages}, VendorPackages),

    %%
    VendorFileIds = lists:flatten(lists:map(fun (#{~"hasFiles" := Fs}) -> Fs end, Vendors)),
    FixedApps = lists:map(fun (#{~"hasFiles" := SPDXIDs}=AppPackage) ->
                                  AppPackage#{~"hasFiles" := SPDXIDs -- VendorFileIds}
                          end, Apps),
    {Vendors, Spdx#{~"packages" := FixedApps}}.

%% project package contains `hasFiles` fields with all files.
%% remove all files included in other packages from project package.
%% there exists already a package relation between packages and project package.
remove_package_files_from_project(#{~"documentDescribes" := [ProjectPackageId],
                                    ~"packages" := Packages}=Spdx, AppPackages) ->
    [#{~"hasFiles" := FilesId}=ProjectPackage] = lists:filter(fun (#{~"SPDXID" := SPDXID}) -> SPDXID == ProjectPackageId end, Packages),
    AppFilesId = lists:foldl(fun (#{~"hasFiles" := Files}, Acc) -> Files ++ Acc end, [], AppPackages),
    ProjectPackage1 = ProjectPackage#{~"hasFiles" := FilesId -- AppFilesId},
    Spdx#{~"packages" := [ProjectPackage1 | Packages -- [ProjectPackage]]}.


-spec create_spdx_package(Package :: spdx_package()) -> map().
create_spdx_package(Pkg) ->
    SPDXID = Pkg#spdx_package.'SPDXID',
    VersionInfo= Pkg#spdx_package.'versionInfo',
    Name = Pkg#spdx_package.'name',
    CopyrightText = Pkg#spdx_package.'copyrightText',

    FilesAnalyzed = Pkg#spdx_package.'filesAnalyzed',
    HasFiles = Pkg#spdx_package.'hasFiles',
    Homepage = Pkg#spdx_package.'homepage',
    LicenseConcluded = Pkg#spdx_package.'licenseConcluded',
    LicenseDeclared = Pkg#spdx_package.'licenseDeclared',
    LicenseInfo = Pkg#spdx_package.'licenseInfoFromFiles',
    DownloadLocation = Pkg#spdx_package.'downloadLocation',
    PackageVerification = Pkg#spdx_package.'packageVerificationCode',
    PackageVerificationCodeValue = maps:get('packageVerificationCodeValue', PackageVerification),
    Supplier = Pkg#spdx_package.'supplier',
    Purl1 = case Pkg#spdx_package.'purl' of
               false -> [];
               _ -> [Pkg#spdx_package.'purl']
           end,
    #{ ~"SPDXID" => SPDXID,
       ~"versionInfo" => VersionInfo,
       ~"name" => Name,
       ~"copyrightText" => CopyrightText,
       ~"filesAnalyzed" => FilesAnalyzed,
       ~"hasFiles" => HasFiles,
       ~"homepage" => Homepage,
       ~"licenseConcluded" => LicenseConcluded,
       ~"licenseDeclared" => LicenseDeclared,
       ~"licenseInfoFromFiles" => LicenseInfo,
       ~"downloadLocation" => DownloadLocation,
       ~"externalRefs" => Purl1,
       ~"packageVerificationCode" => #{~"packageVerificationCodeValue" => PackageVerificationCodeValue},
       ~"supplier" => Supplier
     }.

%% Example:
%% https://github.com/spdx/tools-java/blob/master/testResources/SPDXJSONExample-v2.2.spdx.json#L240-L275
create_package_relationships(Packages, Spdx) ->
    Relationships =
        lists:foldl(fun (Pkg, Acc) ->
                            {Key, Ls} = case Pkg#spdx_package.'relationships' of
                                            #{'PACKAGE_OF' := L } -> {'PACKAGE_OF', L};
                                            #{'TEST_OF' := L} -> {'TEST_OF', L};
                                            #{'DOCUMENTATION_OF' := L} -> {'DOCUMENTATION_OF', L}
                                        end,
                            lists:foldl(fun ({ElementId, RelatedElement}, Acc1) ->
                                              [create_spdx_relation(Key, ElementId, RelatedElement) | Acc1]
                                      end, Acc, Ls)
                    end, [], Packages),
    Spdx#{~"relationships" => Relationships}.

-spec create_depends_on_relationships(PackageMappings, Spdx) -> map() when
      PackageMappings :: #{AppName => {AppPath, app_info()}},
      AppName :: binary(),
      AppPath :: binary(),
      Spdx :: map().
create_depends_on_relationships(PackageTemplates, #{~"relationships" := Relationships}=Spdx) ->
    DependsOn =
        maps:fold(fun (PackageName, {_Path, AppInfo}, Acc) ->
                          DependsOnApps = lists:map(fun erlang:atom_to_binary/1, AppInfo#app_info.applications),
                          SpdxPackageName = generate_spdxid_name(PackageName),
                          Relations = [create_spdx_relation('DEPENDS_ON', SpdxPackageName, generate_spdxid_name(RelatedElement))
                                        || RelatedElement <- [~"erts" | DependsOnApps], generate_spdxid_name(RelatedElement) =/= SpdxPackageName],
                           Relations ++ Acc
                   end, [], PackageTemplates),
    Spdx#{~"relationships" := DependsOn ++ Relationships}.

create_opt_depency_relationships(PackageTemplates, #{~"relationships" := Relationships}=Spdx) ->
    DependsOn =
        maps:fold(fun (PackageName, {_Path, AppInfo}, Acc) ->
                          Optional = AppInfo#app_info.included_applications ++ AppInfo#app_info.optional_applications,
                          DependsOnApps = lists:map(fun erlang:atom_to_binary/1, Optional),
                          SpdxPackageName = generate_spdxid_name(PackageName),
                          Relations = [create_spdx_relation('OPTIONAL_DEPENDENCY_OF', generate_spdxid_name(RelatedElement), SpdxPackageName)
                                        || RelatedElement <- DependsOnApps, generate_spdxid_name(RelatedElement) =/= SpdxPackageName],
                           Relations ++ Acc
                   end, [], PackageTemplates),
    Spdx#{~"relationships" := DependsOn ++ Relationships}.


%% adds package of to packages within packages in OTP.
%% example: asmjit is a subpackage of erts
create_vendor_relations(NewVendorPackages, #{~"packages" := Packages, ~"relationships" := Relations}=SpdxWithVendor) ->
    VendorRelations =
        lists:map(fun (#{~"name" := _Name, ~"SPDXID" := ID}=_Vendor) ->
                          %% Get root relation to point to
                          App = case string:split(undo_spdxid_name(ID), ~"-", all) of
                                    [BaseApp, ~"test" | _] ->
                                        <<BaseApp/binary, "-test">>;
                                    [BaseApp, ~"-documentation" | _] ->
                                        <<BaseApp/binary, "-documentation">>;
                                    [BaseApp | _] ->
                                        BaseApp
                                end,
                          Pkgs = lists:filter(fun (#{~"name" := N}) -> App == generate_spdx_valid_name(N) end, Packages),
                          case Pkgs of
                              [#{~"SPDXID" := RootId}=_RootPackage] ->
                                  create_spdx_relation('PACKAGE_OF', ID, RootId);
                              [] ->
                                  %% Attach to root level package
                                  create_spdx_relation('PACKAGE_OF', ID, ?spdxref_project_name)
                              end
                  end, NewVendorPackages),
    SpdxWithVendor#{~"relationships" := Relations ++ VendorRelations}.

-spec create_spdx_relation('PACKAGE_OF' | 'DEPENDS_ON', SpdxId :: binary(), RelatedId :: binary()) -> map().
create_spdx_relation(Relation, ElementId, RelatedElement) ->
    #{~"spdxElementId" => ElementId,
      ~"relatedSpdxElement" => RelatedElement,
      ~"relationshipType" => Relation}.

-spec find_app_src_files(Folder :: string()) -> [string()].
find_app_src_files(Folder) ->
    S = os:cmd("find "++ Folder ++ " -regex .*.app.src | grep -v test | grep -v smoke-build | cut -d/ -f2-"),

    %% TODO: merge above and below command into a single command
    %% specific line to include common_test, if it were to exist
    SCommonTest = os:cmd("find " ++ Folder ++ " -regex .*.app.src | grep -v test_dir | grep common_test | cut -d/ -f2-"),
    lists:map(fun erlang:list_to_binary/1, string:split(S ++ SCommonTest, "\n", all)).

get_otp_apps_from_table() ->
    {ok, BinTable} = file:read_file("otp_versions.table"),
    {ok, ReleaseBin0} = file:read_file("OTP_VERSION"),
    ReleaseBin = string:trim(ReleaseBin0),
    OTPVersion = <<"OTP-", ReleaseBin/binary>>,

    Lines = string:split(BinTable, "\n", all),
    LineOTP = lists:filter(fun (Line) ->
                                   [V | _Rest] = string:split(Line, " "),
                                   V == OTPVersion
                           end, Lines),
    case LineOTP of
        [Line] ->
            Line1 = re:replace(Line, "(#|:)", "", [global]),
            Line2 = string:trim(re:replace(Line1, OTPVersion, "", [global])),

            AppsWithVersion = string:split(Line2, " ", all),
            AppsWithVersion1 = lists:filter(fun (Bin) ->
                                                    case Bin of
                                                        <<>> ->
                                                            false;
                                                        _ ->
                                                            true
                                                    end
                                            end, AppsWithVersion),
            lists:map(fun (App) ->
                              [Name, Version] = string:split(App, "-"),
                              {Name, Version}
                      end, AppsWithVersion1);
            %% lists:map(fun (App) -> iolist_to_binary(re:replace(App, "-.*", "", [global])) end, AppsWithVersion);
        [] ->
            [];
        _ ->
            io:format("ERROR, there cannot be multiple lines matching")
    end.

find_vendor_src_files(Folder) ->
    string:split(string:trim(os:cmd("find "++ Folder ++ " -name vendor.info")), "\n", all).

-spec generate_spdx_mappings(Path :: [binary()]) -> Result when
      Result :: #{AppName :: binary() => {AppPath :: binary(), AppInfo :: app_info()}}.
generate_spdx_mappings(AppSrcPath) ->
    lists:foldl(fun (AppSrcPath0, Acc) ->
                        DetectedPackages = build_package_location(AppSrcPath0),
                        maps:merge(Acc, DetectedPackages)
                end, #{}, AppSrcPath).

%% Read Path file and generate Json (map) following vendor.info specification
-spec generate_vendor_info_package(VendorSrcPath :: [file:name()]) -> map().
generate_vendor_info_package(VendorSrcPath) ->
    lists:flatmap(fun decode_without_spdx_license/1, VendorSrcPath).

-spec generate_spdx_vendor_packages(VendorInfoPackage :: map(), map()) -> map().
generate_spdx_vendor_packages(VendorInfoPackages, #{~"files" := SpdxFiles}=_SPDX) ->
    RemoveVendorInfoFields = [~"purl", ~"ID", ~"path", ~"update", ~"exclude", ~"sha"],
    lists:map(fun
                  (#{~"ID" := Id, ~"path" := [_ | _]=ExplicitFiles}=Package) when is_list(ExplicitFiles) ->
                      %% Deals with the cases of creating a package out of specific files
                      Paths = lists:map(fun cleanup_path/1, ExplicitFiles),
                      Package1 = maps:without(RemoveVendorInfoFields, Package),
                      Excludes = get_vendor_excludes(Package),

                      %% place files in SPDX in the corresponding package
                      Files = lists:filter(fun (#{~"fileName" := Filename}) ->
                                                   case lists:member(Filename, Paths) of
                                                       false -> false;
                                                       true -> not exclude_vendor_file(Filename, Excludes)
                                                   end
                                           end, SpdxFiles),

                      LicenseInfoInFiles = split_licenses_in_individual_parts(
                        lists:foldl(fun(#{~"licenseInfoInFiles" := Licenses}, Acc) ->
                                            Licenses ++ Acc
                                    end, [], Files)),

                      PackageVerificationCodeValue = generate_verification_code_value(Files),
                      ExternalRefs = generate_vendor_purl(Package),
                      Package1#{
                                ~"SPDXID" => generate_spdxid_name(Id),
                                ~"filesAnalyzed" => true,
                                ~"hasFiles" => lists:map(fun (#{~"SPDXID":=Id0}) -> Id0 end, Files),
                                ~"licenseConcluded" => ~"NOASSERTION",
                                ~"licenseInfoFromFiles" => lists:uniq(LicenseInfoInFiles),
                                ~"packageVerificationCode" => #{~"packageVerificationCodeValue" => PackageVerificationCodeValue},
                                ~"comment" => ~"vendor package",
                                ~"externalRefs" => ExternalRefs
                       };
                  (#{~"ID" := Id, ~"path" := DirtyPath}=Package) when is_binary(DirtyPath) ->
                      %% Deals with the case of creating a package out of a path
                      Path = ensure_trailing_slash(cleanup_path(DirtyPath)),
                      true = filelib:is_dir(DirtyPath),
                      Package1 = maps:without(RemoveVendorInfoFields, Package),
                      Excludes = get_vendor_excludes(Package),

                      %% place files in SPDX in the corresponding package
                      Files = lists:filter(fun (#{~"fileName" := Filename}) ->
                                                   case string:prefix(Filename, Path) of
                                                       nomatch -> false;
                                                       _ -> not exclude_vendor_file(Filename, Excludes)
                                                   end
                                           end, SpdxFiles),
                      LicenseInfoInFiles = split_licenses_in_individual_parts(
                        lists:foldl(fun(#{~"licenseInfoInFiles" := Licenses}, Acc) ->
                                            Licenses ++ Acc
                                    end, [], Files)),

                      PackageVerificationCodeValue = generate_verification_code_value(Files),
                      ExternalRefs = generate_vendor_purl(Package),
                      Package1#{
                                ~"SPDXID" => generate_spdxid_name(Id),
                                ~"filesAnalyzed" => true,
                                ~"hasFiles" => lists:map(fun (#{~"SPDXID":=Id0}) -> Id0 end, Files),
                                ~"licenseConcluded" => ~"NOASSERTION",
                                ~"licenseInfoFromFiles" => lists:uniq(LicenseInfoInFiles),
                                ~"packageVerificationCode" => #{~"packageVerificationCodeValue" => PackageVerificationCodeValue},
                                ~"comment" => ~"vendor package",
                                ~"externalRefs" => ExternalRefs
                       }
              end, VendorInfoPackages).

get_vendor_excludes(Package) ->
    lists:map(fun (Exclude) ->
                      CleanExclude = cleanup_path(Exclude),
                      case filelib:is_dir(Exclude) of
                          true ->
                              {dir, ensure_trailing_slash(CleanExclude)};
                          false ->
                              true = filelib:is_regular(Exclude),
                              {file, CleanExclude}
                      end
              end, maps:get(~"exclude", Package, [])).

exclude_vendor_file(Filename, Excludes) ->
    lists:any(fun ({file, ExcludeFile}) ->
                      string:equal(Filename, ExcludeFile);
                  ({dir, ExcludeDir}) ->
                      case string:prefix(Filename, ExcludeDir) of
                          nomatch -> false;
                          _ -> true
                      end
              end, Excludes).

ensure_trailing_slash(Path) ->
    [string:trim(Path, trailing, "/"), $/].

generate_vendor_purl(Package) ->
    Description = maps:get(~"description", Package, ""),
    Vsn = maps:get(~"versionInfo", Package, false),
    Purl = maps:get(~"purl", Package, false),

    case {Purl, Vsn}  of
        {false, _} ->
            [];
        {Purl, false} ->
            [create_externalRef_purl(Description, Purl)];
        {Purl, Vsn} ->
            [create_externalRef_purl(Description, <<Purl/binary, "@", Vsn/binary>>)]
    end.

cleanup_path(<<"./", Path/binary>>) when is_binary(Path) -> Path;
cleanup_path(Path) when is_binary(Path) -> Path.

build_package_location(<<>>) -> #{};
build_package_location(AppSrcPath) ->
    case string:split(AppSrcPath, "/", all) of
        [~"lib", App | _] ->
            AppName = erlang:binary_to_atom(App),
            _ = case application:load(AppName) of
                    R when R==ok orelse R=={error, {already_loaded, AppName}} ->
                        %% somewhat unsafe binary_to_atom/1 but we have guarantees to receive
                        %% only apps in Erlang/OTP
                        {ok, AppKey} = application:get_all_key(AppName),
                        AppKey1 = app_key_to_record(AppKey),
                        #{App => {<<"lib/", App/binary>>, AppKey1}};
                    _E ->
                        % useful only for debugging.
                        % this script should have all dependencies and never end up here.
                        io:format("[Error] ~p~n", [{AppSrcPath, _E, AppName, App}]),
                        error(?FUNCTION_NAME)
                end;
        [~"erts"=Erts | _] ->
            #{Erts => {Erts, #app_info{ description = ~"Erlang Runtime System",
                                        id           = [],
                                        vsn          = erlang:list_to_binary(erlang:system_info(version)),
                                        modules      = not_loaded,
                                        applications = [],
                                        included_applications = [],
                                        optional_applications = [] }}}
    end.

app_key_to_record(AppKey) ->
    [{description, Description}, {id, Id},
     {vsn, Vsn}, {modules, Modules},
     {maxP, _}, {maxT, _},
     {registered, _Registered},
     {included_applications, Included},
     {optional_applications, Optional},
     {applications, Apps},
     {env, _Env}, {mod, _Mod},
     {start_phases,_Phases}] = AppKey,
    #app_info{ description  = erlang:list_to_binary(Description),
               id           = erlang:list_to_binary(Id),
               vsn          = erlang:list_to_binary(Vsn),
               modules      = Modules,
               applications = Apps,
               included_applications = Included,
               optional_applications = Optional }.


-spec generate_spdx_packages(PackageMappings, Spdx) -> [spdx_package()] when
      PackageMappings :: #{AppName => {AppPath, app_info()}},
      AppName         :: unicode:chardata(),
      AppPath         :: unicode:chardata(),
      Spdx            :: map().
generate_spdx_packages(PackageMappings, #{~"files" := Files,
                                          ~"documentDescribes" := [ProjectName]}=_Spdx) ->
    SystemDocs = generate_spdx_system_docs(Files, ProjectName),
    maps:fold(fun (PackageName, {PrefixPath, AppInfo}, Acc) ->
                      SpdxPackageFiles = group_files_by_app(Files, PrefixPath),
                      TestFiles = get_test_files(PackageName, SpdxPackageFiles, PrefixPath),
                      DocFiles = get_doc_files(PackageName, SpdxPackageFiles, PrefixPath),
                      OTPAppFiles = (SpdxPackageFiles -- TestFiles) -- DocFiles,

                      LicenseOTPApp = otp_app_license_mapping(PackageName),
                      Package = create_spdx_package_record(PackageName, AppInfo#app_info.vsn,
                                                           AppInfo#app_info.description,
                                                           OTPAppFiles, ?spdx_homepage,
                                                           LicenseOTPApp,LicenseOTPApp, true),
                      DocPackage = create_spdx_package_record(<<PackageName/binary, "-documentation">>,
                                                              AppInfo#app_info.vsn,
                                                              <<"Documentation of ", PackageName/binary>>,
                                                              DocFiles, ?spdx_homepage,
                                                              LicenseOTPApp, LicenseOTPApp, false),
                      TestPackage = create_spdx_package_record(<<PackageName/binary, "-test">>,
                                                              AppInfo#app_info.vsn,
                                                              <<"Tests of ", PackageName/binary>>,
                                                              TestFiles, ?spdx_homepage,
                                                              LicenseOTPApp, LicenseOTPApp, false),

                      Relations = [ {'PACKAGE_OF', [{ Package#spdx_package.'SPDXID', ProjectName }]},
                                    {'DOCUMENTATION_OF', [{ DocPackage#spdx_package.'SPDXID', Package#spdx_package.'SPDXID' }]},
                                    {'TEST_OF', [{ TestPackage#spdx_package.'SPDXID', Package#spdx_package.'SPDXID' }]} ],

                      Packages = lists:zipwith(fun (P, {K, R}) ->
                                                       P#spdx_package { 'relationships' = #{ K => R} }
                                               end, [Package, DocPackage, TestPackage], Relations),
                      Packages ++ Acc
               end, [SystemDocs], PackageMappings).

generate_spdx_system_docs(Files, ParentSPDXPackageId) ->
    PrefixPath = ~"system",
    SpdxPackageFiles = group_files_by_app(Files, PrefixPath),
    PackageName = ~"system",
    DocFiles = get_doc_files(PackageName, SpdxPackageFiles, PrefixPath),
    LicenseUpdated = generate_license_info_from_files(DocFiles),
    ValidLicense = remove_invalid_spdx_licenses(LicenseUpdated),
    OneLinerLicense = binary:join(ValidLicense, ~" AND "),
    DocPackage = create_spdx_package_record(<<PackageName/binary, "-documentation">>,
                                            get_otp_version(),
                                            <<"System Documentation">>,
                                            DocFiles, ?spdx_homepage,
                                            OneLinerLicense, OneLinerLicense, false),
    Relations = #{ 'DOCUMENTATION_OF' => [{ DocPackage#spdx_package.'SPDXID', ParentSPDXPackageId }]},
    DocPackage#spdx_package { 'relationships' = Relations }.

%% Erlang/OTP apps always follow the convention of having 'test' and 'doc'
%% folder at top-level of the app folder. erts is more special and we must check
%% that in multiples levels, thus, we use wildcard patterns.
get_test_files(~"erts", SpdxPackageFiles, PrefixPath) ->
    group_files_by_folder(SpdxPackageFiles, binary_to_list(PrefixPath)++"/**/test/**");
get_test_files(_App, SpdxPackageFiles, PrefixPath) ->
    group_files_by_folder(SpdxPackageFiles, binary_to_list(PrefixPath)++"/test/**").

get_doc_files(~"erts", SpdxPackageFiles, PrefixPath) ->
    group_files_by_folder(SpdxPackageFiles, binary_to_list(PrefixPath)++"/**/doc/**");
get_doc_files(_App, SpdxPackageFiles, PrefixPath) ->
    group_files_by_folder(SpdxPackageFiles, binary_to_list(PrefixPath)++"/doc/**").

create_spdx_package_record(PackageName, Vsn, Description, SpdxPackageFiles,
                           Homepage, LicenseConcluded, LicenseDeclared, Purl) ->
    SpdxPackageName = generate_spdxid_name(PackageName),
    VerificationCodeValue = generate_verification_code_value(SpdxPackageFiles),
    Purl1 = case Purl of
                false -> false;
                true -> create_externalRef_purl(Description, otp_purl(PackageName, Vsn))
            end,
    #spdx_package {
       'SPDXID' = SpdxPackageName,
       'versionInfo' = Vsn,
       'description' = Description,
       'name' = PackageName,
       'copyrightText' = generate_copyright_text(SpdxPackageFiles),
       'filesAnalyzed' = true,

       %% O(n2) complexity... fix if necessary
       'hasFiles' = generate_has_files(SpdxPackageFiles),

       'purl' = Purl1,
       'homepage' = Homepage,
       'licenseConcluded' = LicenseConcluded,
       'licenseDeclared'  = LicenseDeclared,
       'licenseInfoFromFiles' = generate_license_info_from_files(SpdxPackageFiles),
       'packageVerificationCode' = #{ 'packageVerificationCodeValue' => VerificationCodeValue},
       'relationships' = #{}
      }.


otp_app_license_mapping(Name) ->
    case Name of
        ~"edoc" -> ~"Apache-2.0 OR LGPL-2.1-or-later";
        ~"syntax_tools" -> ~"Apache-2.0 OR LGPL-2.1-or-later";
        ~"eunit" -> ~"Apache-2.0 OR LGPL-2.1-or-later";
        ~"eldap" -> ~"MIT";
        _ -> ?erlang_license
    end.


generate_spdxid_name(PackageName) ->
    PackageName1 = generate_spdx_valid_name(PackageName),
    <<"SPDXRef-otp-", PackageName1/binary>>.

undo_spdxid_name(Name) ->
    <<"SPDXRef-otp-", PackageName/binary>> = Name,
    PackageName.

generate_spdx_valid_name(PackageName) ->
    iolist_to_binary(string:replace(PackageName, ~"_", ~"", all)).

generate_license_info_from_files(SpdxPackageFiles) ->
    Result = lists:foldl(fun (#{~"licenseInfoInFiles" := LicenseInfoInFiles}, AccLicenses) ->
                                 split_licenses_in_individual_parts(LicenseInfoInFiles) ++ AccLicenses
                         end, [], SpdxPackageFiles),
    lists:uniq(Result).

split_licenses_in_individual_parts(Licenses) ->
    lists:foldl(fun (License, Acc) ->
                        L = re:replace(License, "[\(|\)]", "", [global, {return, list}]),
                        Licenses0 = string:split(list_to_binary(L), ~" OR "),
                        Licenses1 = lists:foldl(fun (L1, Acc1) -> string:split(L1, ~" AND ") ++ Acc1  end, [], Licenses0),
                        lists:uniq(lists:map(fun string:trim/1, Licenses1 ++ Acc))
                end, [], Licenses).

generate_has_files(SpdxPackageFiles) ->
    lists:map(fun (#{~"SPDXID" := SpdxId}) -> SpdxId end, SpdxPackageFiles).

%% alg. described in https://spdx.github.io/spdx-spec/v2.2.2/package-information/#791-description
-spec generate_verification_code_value(SpdxPackageFiles :: [SPDXFile :: map()]) -> binary().
generate_verification_code_value(SpdxPackageFiles) ->
    SHA1s = lists:map(fun (#{~"checksums" := [#{~"algorithm" := ~"SHA1", ~"checksumValue" := SHA1}]}) ->
                              SHA1
                      end, SpdxPackageFiles),
    Sorted = lists:sort(SHA1s),
    Merged = lists:foldl(fun(SHA1, Acc) ->
                                 <<Acc/binary, SHA1/binary>>
                         end, <<>>, Sorted),

    %% The crypto hash returns a binary that's not HEX encoded.
    HEX = binary:encode_hex(crypto:hash(sha, Merged)),

    %% encode_hex returns uppercase letters, but the output to SPDX must be lowercase
    StringHex = binary_to_list(HEX),
    list_to_binary(string:to_lower(StringHex)).

generate_copyright_text(SpdxPackageFiles) ->
    CopyrightText = lists:foldl(fun (#{~"copyrightText" := CopyrightText}, Acc0) ->
                                        lists:uniq([CopyrightText | Acc0])
                                end, [], SpdxPackageFiles),
    lists:foldl(fun (Copyright, Acc0) ->
                    <<Copyright/binary, "\n", Acc0/binary>>
                end, <<>>, CopyrightText).

group_files_by_app(Files, PrefixPath) ->
    lists:filter(fun (#{~"fileName" := Filename}) ->
                         case string:prefix(Filename, PrefixPath) of
                             nomatch ->
                                 false;
                             _ ->
                                 true
                         end
                 end, Files).

group_files_by_folder(Files, Wildcard) ->
    FilesInFolder = lists:map(fun unicode:characters_to_binary/1, filelib:wildcard(Wildcard)),
    lists:filter(fun (#{~"fileName" := Filename}) ->
                         lists:member(Filename, FilesInFolder)
                 end, Files).

test_file(#{sbom_file := SbomFile, ntia_checker := Verification}) ->
    Sbom = decode(SbomFile),
    ok = test_generator(Sbom),
    ok = test_ntia_checker(Verification, SbomFile),
    ok.

test_ntia_checker(false, _SbomFile) -> ok;
test_ntia_checker(true, SbomFile) -> 
    have_tool("ntia-checker"),
    Cmd = "sbomcheck --comply ntia --file " ++ SbomFile,
    io:format("~nRunning: NTIA Compliance Checker~n[~ts]~n", [Cmd]),
    _ = cmd(Cmd),
    io:format("OK~n"),
    ok.

cmd(Cmd) ->
    string:trim(os:cmd(unicode:characters_to_list(Cmd),
                       #{ exception_on_failure => true })).    

have_tool(Tool) ->
    case os:find_executable(Tool) of
        false -> fail("Could not find '~ts' in PATH", [Tool]);
        _ -> ok
    end.

fail(Fmt, Args) ->
    io:format(standard_error, Fmt++"\n", Args),
    erlang:halt(1).

test_generator(Sbom) ->
    io:format("~nRunning: verification of OTP SBOM integrity~n"),
    ok = project_generator(Sbom),
    ok = package_generator(Sbom),    
    ok.

-define(CALL_TEST_FUNCTIONS(Tests, Sbom), 
         (begin
            io:format("[~s]~n", [?FUNCTION_NAME]),
            lists:all(fun (Fun) ->
                              Module = ?MODULE,
                              Result = apply(Module, Fun, [Sbom]),
                              L = length(atom_to_list(Fun)),
                              io:format("- ~s~s~s~n", [Fun, lists:duplicate(40 - L, "."), Result]),                                                          
                              ok == Result
                      end, Tests)
        end)).

project_generator(Sbom) ->    
    Tests = [test_project_name,
             test_name,
             test_creators_tooling,
             test_spdx_version],
    true = ?CALL_TEST_FUNCTIONS(Tests, Sbom),    
    ok.

package_generator(Sbom) ->
    Tests = [test_minimum_apps,
             test_copyright_not_empty,

             %% TODO: enable once we can curate ORT copyrights
             %% test_copyright_format,

             test_filesAnalised,
             test_hasFiles_not_empty,

             % TODO: enable once licenseInFiles match licenseConcluded
             %% test_files_licenses,
             test_homepage,
             test_licenseConcluded_exists,
             test_licenseDeclared_exists,
             test_licenseInfoFromFiles_not_empty,
             test_package_names,
             test_package_ids,
             test_erts,
             test_verificationCode,
             test_supplier_Ericsson,
             test_originator_Ericsson,
             test_versionInfo_not_empty,
             test_package_hasFiles,
             test_project_purl,
             test_packages_purl,
             test_download_location,
             test_package_relations,
             test_has_extracted_licenses,
             test_vendor_packages],
    true = ?CALL_TEST_FUNCTIONS(Tests, Sbom),
    ok.

test_project_name(#{~"documentDescribes" := [ProjectName]}=_Sbom) ->
    ?spdxref_project_name = ProjectName,
    ok.

test_name(#{~"name" := Name}=_Sbom) ->
    ?spdx_project_name = Name,
    ok.

test_creators_tooling(#{~"creationInfo" := #{~"creators" := Creators}}=_Sbom) ->
    true = lists:any(fun (Name) ->
                             case string:prefix(Name, ?spdx_creators_tooling) of
                                 nomatch -> false;
                                 _ -> true
                             end
                     end, Creators),
    ok.

test_spdx_version(#{~"spdxVersion" := Version}=_Sbom) ->
    ?spdx_version = Version,
    ok.

test_minimum_apps(#{~"documentDescribes" := [ProjectName], ~"packages" := Packages}=_Sbom) ->
    _ = lists:foreach(fun (X) -> application:load(erlang:binary_to_atom(X)) end, minimum_otp_apps()),
    [#{~"name" := Project}] = lists:filter(fun (#{~"SPDXID" := Id}) -> Id == ProjectName end, Packages),
    TestPackageNames = [Project | minimum_otp_apps() ++ root_vendor_packages()],
    SPDXIds = lists:map(fun (#{~"name" := SPDXId}) -> SPDXId end, Packages),
    try
        %% test know packages are captured
        true = [] == TestPackageNames -- SPDXIds
    catch
        _E:_S:_ ->
            io:format("Minimum apps not captured.~n~p distinct from ~p~n", [TestPackageNames -- SPDXIds, SPDXIds -- TestPackageNames]),
            error(?FUNCTION_NAME)
    end,
    AppNamesVersion = lists:map(fun ({Name, Version}) -> {generate_spdxid_name(Name), Version} end, get_otp_apps_from_table()),
    true = lists:all(fun (#{~"SPDXID" := Id, ~"versionInfo" := Version}) ->
                              case lists:keyfind(Id, 1, AppNamesVersion) of
                                  {_, TableVersion} ->
                                      io:format("Table ~p AppVersion ~p, ~p~n", [TableVersion, Version, Id]),
                                      TableVersion == Version;
                                  false ->
                                      true
                              end
                      end, Packages),
    ok.

minimum_otp_apps() ->
    [~"kernel", ~"stdlib", ~"xmerl", ~"wx", ~"tools", ~"tftp", ~"syntax_tools", ~"ssl",
     ~"ssh", ~"snmp", ~"sasl", ~"runtime_tools", ~"reltool", ~"public_key", ~"parsetools",
     ~"os_mon", ~"observer", ~"mnesia", ~"megaco", ~"jinterface", ~"inets", ~"ftp", ~"eunit",
     ~"et", ~"erl_interface", ~"eldap", ~"edoc", ~"diameter", ~"dialyzer", ~"debugger", ~"crypto",
     ~"compiler", ~"common_test", ~"erts", ~"asn1", ~"odbc"].

root_vendor_packages() ->
    [ ~"asmjit", ~"pcre2", ~"zlib", ~"ryu", ~"zstd"].

minimum_vendor_packages() ->
    %% self-contained
    root_vendor_packages() ++
        [~"tcl", ~"STL", ~"json-test-suite", ~"openssl", ~"Autoconf", ~"wx", ~"jquery", ~"jquery-tablesorter"].

test_copyright_not_empty(#{~"packages" := Packages}) ->
    true = lists:all(fun (#{~"copyrightText" := Copyright}) -> Copyright =/= ~"" end, Packages),
    ok.

%% test_copyright_format(#{~"packages" := Packages, ~"files" := Files}) ->
%%     EricssonRegex = ~S"^Copyright Ericsson AB ((?:19|20)[0-9]{2}-)?((?:19|20)[0-9]{2}).*$",
%%     ContributorRegex = ~S"^Copyright([\s]?\([cC©]\))? ((?:19|20)[0-9]{2}-)?((?:19|20)[0-9]{2}) ((\w|\s|-)*)<(\w|\.|-)+@(\w|\.|-)+>$",
%%     VendorRegex = ~S"^Copyright([\s]?\([cC©]\))? ((?:19|20)[0-9]{2}-)?((?:19|20)[0-9]{2})?((\w|\s|-|,|\.)*)$",
%%     Default = ~S"^Copyright[\s]?(\([cC©]\))? ((?:19|20)[0-9]{2}-)?((?:19|20)[0-9]{2}) Erlang/OTP and its contributors$",
%%     NoAssertionRegex = "^NOASSERTION|NONE",
%%     Regexes = [EricssonRegex, ContributorRegex, VendorRegex, NoAssertionRegex, Default],

%%     Regex = lists:concat(lists:join(~S"|", Regexes)),
%%     {ok, CopyrightRegex} = re:compile([Regex]),
%%     true = lists:all(fun (#{~"copyrightText" := CopyrightText, ~"fileName" := Filename}) ->
%%                              Copyrights = string:split(CopyrightText, "\n", all),
%%                              lists:all(fun (C) ->
%%                                                case re:run(C, CopyrightRegex) of
%%                                                    nomatch ->
%%                                                        throw({warn, "Invalid Copyright: '~ts' in '~ts for ~ts~n'", [C, Filename, Regex]});
%%                                                    _ ->
%%                                                        true
%%                                                end
%%                                        end, Copyrights)
%%                      end, Files),

%%     true = lists:all(fun (#{~"copyrightText" := CopyrightText}) ->
%%                              Copyrights = string:split(CopyrightText, "\n", all),
%%                              lists:all(fun (C) ->
%%                                                case re:run(C, CopyrightRegex) of
%%                                                    nomatch ->
%%                                                        throw({warn, "Invalid Copyright: '~ts'", [C]});
%%                                                    _ ->
%%                                                        true
%%                                                end
%%                                        end, Copyrights)
%%                      end, Packages),
%%     ok.


test_filesAnalised(#{~"packages" := Packages}) ->
    true = lists:all(fun (#{~"filesAnalyzed" := Bool}) -> Bool = true end, Packages),
    ok.

test_hasFiles_not_empty(#{~"packages" := Packages}) ->
    try
        true = lists:all(fun (#{~"hasFiles" := Files}) -> length(Files) > 0 end, Packages)
    catch
        _:_:_ ->
            lists:foreach(fun (#{~"hasFiles" := Files, ~"SPDXID":=Id}) ->
                              io:format("~p: length: ~p~n", [Id, length(Files)])
                      end, Packages),
            error(?FUNCTION_NAME)
    end,
    ok.

%% test_files_licenses(Input) ->
%%     ok = test_concluded_license_equals_license_in_file(Input),
%%     ok.

%% print_error(false, Input) ->
%%     io:format("[~p] ~p~n", [false, Input]),
%%     false;
%% print_error(true, _Input) ->
%%     true.

%% test_concluded_license_equals_license_in_file(#{~"files" := Files}) ->
%%     true = lists:all(fun (#{~"licenseInfoInFiles" := [License], ~"licenseConcluded" := License}) ->
%%                              true;
%%                          (#{~"licenseInfoInFiles" := [~"NONE"]}) ->
%%                              true;
%%                          (#{~"licenseInfoInFiles" := Licenses,
%%                             ~"licenseConcluded" := Concluded,
%%                             ~"SPDXID" := Id}) when length(Licenses) > 1 ->
%%                              Licenses1 = lists:map(fun erlang:binary_to_list/1, Licenses),
%%                              LicensesBin = erlang:list_to_binary(lists:join(" AND ", Licenses1)),
%%                              print_error(Concluded =:= LicensesBin, {Id, Licenses, Concluded, ?LINE});
%%                          (#{~"licenseInfoInFiles" := Licenses,
%%                             ~"licenseConcluded" := Concluded,
%%                             ~"SPDXID" := Id}) ->
%%                              print_error(Concluded =:= Licenses, {Id, Licenses, Concluded, ?LINE})
%%                      end, Files),
%%     ok.

test_homepage(#{~"packages" := Packages})->
    true = lists:all(fun (#{~"homepage" := Homepage}) -> Homepage == ?spdx_homepage orelse Homepage =/= <<>> end, Packages),
    ok.

test_licenseConcluded_exists(#{~"packages" := Packages}) ->
    true = lists:all(fun (#{~"licenseConcluded" := License}) -> License =/= ~"" andalso License =/= ~"NONE" end, Packages),
    ok.

test_licenseDeclared_exists(#{~"packages" := Packages}) ->
    true = lists:all(fun (#{~"licenseDeclared" := License}) -> License =/= ~"" andalso License =/= ~"NONE" end, Packages),
    ok.

test_licenseInfoFromFiles_not_empty(#{~"packages" := Packages}) ->
    true = lists:all(fun (#{~"licenseInfoFromFiles" := Ls}) ->
                             case Ls of
                                 [] ->
                                     false;
                                 [L | _] when is_list(L) ->
                                     false;
                                 _ ->
                                     true = lists:all(fun (License) -> not erlang:is_integer(License) end, Ls)
                             end
                     end, Packages),

    %% check no duplicates
    true = lists:all(fun (#{~"licenseInfoFromFiles" := Ls}) ->
                             erlang:length(lists:uniq(Ls)) == erlang:length(Ls)
                     end, Packages),
    ok.

test_package_names(#{~"packages" := Packages}) ->
    %% not repeated names
    Names = lists:map(fun (#{~"name" := Name}) -> Name end, Packages),

    %% we know openssl is repeated twice, and Autconf is placed in multiple packages.
    SkippedNames = [~"openssl", ~"Autoconf"],
    Names1 = lists:filter(fun (N) -> not lists:member(N, SkippedNames) end, Names),
    try
        true = length(Names1) == length(lists:uniq(Names1))
    catch
        _:_:_ ->
            io:format("Names are not unique: ~p -- ~p", [Names1, lists:uniq(Names1)]),
            error(?FUNCTION_NAME)
    end,

    true = lists:all(fun (N) -> lists:member(N, Names) end, minimum_otp_apps()),
    ok.

test_package_ids(#{~"packages" := Packages}) ->
    %% Test name starts with SPDXRef-, and contains alphanumeric and -
    true = lists:all(fun (#{~"SPDXID" := <<"SPDXRef-", Rest/binary>>}) ->
                             %% Match on alphanumeric and -
                             Query = "^[a-zA-Z0-9-]*$",
                             {match, _} = re:run(Rest, Query),
                             true
                     end, Packages),
    ok.

test_erts(#{~"packages" := Packages, ~"files" := Files}) ->
    ErtsSpdxId = generate_spdxid_name(~"erts"),
    ErtsPkg = lists:search(fun (#{~"SPDXID" := SpdxId}) -> SpdxId == ErtsSpdxId end, Packages),
    {value, #{~"hasFiles" := HasFiles}} = ErtsPkg,

    %% checks that there are no test files in erts package.
    %% test files for erts should be in erts-test
    ErtsTestFiles = lists:filtermap(fun (#{~"fileName" := <<"erts/emulator/test/", _/binary>>,
                                           ~"SPDXID" := FileId}) -> {true, FileId};
                                        (#{~"fileName" := <<"erts/test/", _/binary>>,
                                           ~"SPDXID" := FileId}) -> {true, FileId};
                                        (_) -> false
                                    end, Files),
    HasFiles = HasFiles -- ErtsTestFiles,

    %% checks that there are no doc files in erts package.
    %% doc files for erts should be in erts-doc
    ErtsDocFiles = lists:filtermap(fun (#{~"fileName" := <<"erts/preloaded/doc/", _/binary>>,
                                          ~"SPDXID" := FileId}) -> {true, FileId};
                                       (#{~"fileName" := <<"erts/doc/", _/binary>>,
                                          ~"SPDXID" := FileId}) -> {true, FileId};
                                       (_) -> false
                                   end, Files),
    HasFiles = HasFiles -- ErtsDocFiles,
    ok.

test_verificationCode(#{~"packages" := Packages}) ->
    true = lists:all(fun (#{~"packageVerificationCode" := #{~"packageVerificationCodeValue" := Value}}) ->
                             Value =/= ~"TODO" andalso Value =/= <<>>
                     end, Packages),
    ok.

test_supplier_Ericsson(#{~"packages" := Packages}) ->
    true = lists:all(fun (#{~"supplier" := Supplier, ~"name" := Name}) ->
                             %% logical implication (->) expressed in boolean logic (not A or B)
                             not lists:member(Name, minimum_otp_apps()) orelse Supplier == ?spdx_supplier
                     end, Packages),
    ok.

test_originator_Ericsson(#{~"packages" := Packages}) ->
    %% TODO: needs fixing ORT otp
    true = lists:all(fun (#{~"name" := Name}=Spdx) ->
                             case maps:get(~"originator", Spdx, badkey) of
                                 badkey ->
                                     true;
                                 Originator ->
                                     %% logical implication (->) expressed in boolean logic (not A or B)
                                     not lists:member(Name, minimum_otp_apps()) orelse Originator == ?spdx_supplier
                             end
                     end, Packages),
    ok.

test_versionInfo_not_empty(#{~"packages" := Packages}) ->
    true = lists:all(fun (#{~"versionInfo" := Version}) -> Version =/= ~"" end, Packages),
    ok.

test_download_location(#{~"packages" := Packages}) ->
    true = lists:all(fun (#{~"downloadLocation" := Loc}) -> Loc =/= ~"" end, Packages),
    ok.

test_package_hasFiles(#{~"packages" := Packages}) ->
    %% test files are not repeated
    AllFiles = lists:foldl(fun (#{~"hasFiles" := FileIds}, Acc) -> FileIds ++ Acc end, [], Packages),

    try
        true = length(AllFiles) == length(lists:uniq(AllFiles))
    catch _:_:_ ->
            io:format("~p~n",[AllFiles -- lists:uniq(AllFiles)]),
            error(?FUNCTION_NAME)
    end,

    %% Test all files contain at least one file
    true = lists:all(fun (#{~"hasFiles" := Files}) -> erlang:length(Files) > 0 end, Packages),
    ok.

test_project_purl(#{~"documentDescribes" := [ProjectName], ~"packages" := Packages}=_Sbom) ->
    [#{~"externalRefs" := [Purl], ~"versionInfo" := VersionInfo}] = lists:filter(fun (#{~"SPDXID" := Id}) -> ProjectName == Id end, Packages),
    RefLoc = ?spdx_project_purl,
    true = Purl == RefLoc#{ ~"referenceLocator" := <<"pkg:github/erlang/otp@", VersionInfo/binary>> },
    ok.

test_packages_purl(#{~"documentDescribes" := [ProjectName], ~"packages" := Packages}=_Sbom) ->
    OTPPackages = lists:filter(fun (#{~"SPDXID" := Id, ~"name" := Name}) -> ProjectName =/= Id andalso lists:member(Name, minimum_otp_apps()) end, Packages),
    true = lists:all(fun (#{~"name" := Name, ~"versionInfo" := Version, ~"externalRefs" := [#{~"referenceLocator":= RefLoc}=Ref]}) ->
                             ExternalRef = create_externalRef_purl(~"", otp_purl(Name, Version)),
                             ExternalRef1 = maps:remove(~"comment", ExternalRef),
                             Ref1 = maps:remove(~"comment", Ref),

                             %% check expected external ref
                             ExternalRef1 =:= Ref1  andalso
                                 %% check metadata is included in purl
                                 nomatch =/= string:find(RefLoc, ?spdx_purl_meta_data)
                     end, OTPPackages),
    ok.

test_vendor_packages(Sbom) ->
    ok = minimum_vendor_packages(Sbom),
    ok = vendor_relations(Sbom),
    ok.

minimum_vendor_packages(#{~"packages" := Packages}=_Sbom) ->
    VendorNames = minimum_vendor_packages(),
    Names = lists:map(fun (#{~"name" := Name}) -> Name end, Packages),
    true = [] == VendorNames -- Names,
    ok.

vendor_relations(#{~"packages" := Packages, ~"relationships" := Relations}) ->
    PackageIds = lists:map(fun (#{~"SPDXID" := Id}) -> Id end, Packages),
    VendorIds = lists:filtermap(fun (#{~"comment" := " vendor package", ~"SPDXID" := Id}) -> {true, Id} ;
                                      (_) -> false
                                  end, Packages),
    true = lists:all(fun (#{~"relatedSpdxElement" := Related,
                            ~"relationshipType"   := _,
                            ~"spdxElementId" := PackageId}) ->
                             case lists:member(PackageId, VendorIds) of
                                 true ->
                                     lists:member(Related, PackageIds) andalso
                                         PackageId =/= Related ;
                                 false ->
                                     %% ignore non-vendor relations
                                     true
                             end
                     end, Relations),
    ok.

test_package_relations(#{~"packages" := Packages}=Spdx) ->
    PackageIds = lists:map(fun (#{~"SPDXID" := Id}) -> Id end, Packages),
    Relations = maps:get(~"relationships", Spdx),
    true = lists:all(fun (#{~"relatedSpdxElement" := Related,
                            ~"relationshipType"   := Relation,
                            ~"spdxElementId" := PackageId}=Rel) ->
                             Result =   
                                 lists:member(Relation, [~"PACKAGE_OF", ~"DEPENDS_ON", ~"TEST_OF",
                                                         ~"OPTIONAL_DEPENDENCY_OF", ~"DOCUMENTATION_OF"]) andalso
                                 lists:member(Related, PackageIds) andalso
                                 lists:member(PackageId, PackageIds) andalso
                                 PackageId =/= Related andalso
                                 PackageId =/= ?spdxref_project_name,
                            case Result of 
                                false ->
                                    io:format("Error in relation: ~p~n", [Rel]),
                                    false;
                                true ->
                                    true
                            end
                     end, Relations),

    %% test_known_special_cases(),
    SpecialCases = [#{~"relatedSpdxElement" => ~"SPDXRef-otp-erlinterface",
                      ~"relationshipType" => ~"PACKAGE_OF",
                      ~"spdxElementId" => ~"SPDXRef-otp-erlinterface-openssl"},
                    #{~"relatedSpdxElement" => ~"SPDXRef-otp-stdlib-test",
                      ~"relationshipType" => ~"PACKAGE_OF",
                      ~"spdxElementId" => ~"SPDXRef-otp-stdlib-test-json-suite"},
                    #{~"relatedSpdxElement" => ~"SPDXRef-otp-stdlib",
                      ~"relationshipType" => ~"PACKAGE_OF",
                      ~"spdxElementId" => ~"SPDXRef-otp-stdlib-unicode"},
                    #{~"relatedSpdxElement" => ~"SPDXRef-otp-commontest",
                      ~"relationshipType" => ~"PACKAGE_OF",
                      ~"spdxElementId" => ~"SPDXRef-otp-commontest-tablesorter"},
                    #{~"relatedSpdxElement" => ~"SPDXRef-otp-commontest",
                      ~"relationshipType" => ~"PACKAGE_OF",
                      ~"spdxElementId" => ~"SPDXRef-otp-commontest-jquery"}],
    true = lists:all(fun (Case) -> lists:member(Case, Relations) end, SpecialCases),
    ok.

test_has_extracted_licenses(#{~"hasExtractedLicensingInfos" := LicensesInfo,
                              ~"packages" := Packages}=_Spdx) ->
    LicenseRefsInProject =
        lists:uniq(
          lists:foldl(fun (#{~"licenseInfoFromFiles" := InfoFromFilesInPackage }, Acc) ->
                              LicenseRefs = lists:filter(fun (<<"LicenseRef-", _/binary>>) -> true ;
                                                             (_) -> false
                                                         end, InfoFromFilesInPackage),
                              LicenseRefs ++ Acc
                      end, [], Packages)),
    true = lists:all(fun (#{~"licenseId" := LicenseId}) -> lists:member(LicenseId, LicenseRefsInProject) end, LicensesInfo),
    ok.

%% Adds LicenseRef licenses where the text is missing.
extracted_license_info() ->
    [begin
         {ok, License} = file:read_file(Name),
         {unicode:characters_to_binary(filename:basename(filename:rootname(Name))), License}
     end || Name <- filelib:wildcard("LICENSES/LicenseRef*.txt")].

%%
%% REUSE-IgnoreEnd
%%
