From 556b20e7c392322972cd084d03dcf2413697ceed Mon Sep 17 00:00:00 2001 From: gaaclarke <30870216+gaaclarke@users.noreply.github.com> Date: Wed, 18 Jun 2025 13:47:39 -0700 Subject: [PATCH] License cpp jun16 (#170716) The big additions here are: 1) Checking against the catalog of known licenses 1) Parsing of the known catalogs from disk 1) Nicer error messages With this PR we are getting pretty close to feature parity. The big changes left are updating the contents of of the data directory so we match what we want. Execution across the directory is still around 35s. Known extra work: 1) Propagate the `data` directory 1) Make sure the auditable output is looking as we want it 1) Make sure to treat the root directory different where we want to enforce the presence of the file copyright 1) Remove hardcoded name "engine" for the root directory (that means shifting all the integration tests to run in a `engine` directory. 1) Maybe cleanup the file format for the known licenses? ## Pre-launch Checklist - [x] I read the [Contributor Guide] and followed the process outlined there for submitting PRs. - [x] I read the [Tree Hygiene] wiki page, which explains my responsibilities. - [x] I read and followed the [Flutter Style Guide], including [Features we expect every widget to implement]. - [x] I signed the [CLA]. - [x] I listed at least one issue that this PR fixes in the description above. - [x] I updated/added relevant documentation (doc comments with `///`). - [x] I added new tests to check the change I am making, or this PR is [test-exempt]. - [x] I followed the [breaking change policy] and added [Data Driven Fixes] where supported. - [x] All existing and new tests are passing. If you need help, consider asking for advice on the #hackers-new channel on [Discord]. [Contributor Guide]: https://github.com/flutter/flutter/blob/main/docs/contributing/Tree-hygiene.md#overview [Tree Hygiene]: https://github.com/flutter/flutter/blob/main/docs/contributing/Tree-hygiene.md [test-exempt]: https://github.com/flutter/flutter/blob/main/docs/contributing/Tree-hygiene.md#tests [Flutter Style Guide]: https://github.com/flutter/flutter/blob/main/docs/contributing/Style-guide-for-Flutter-repo.md [Features we expect every widget to implement]: https://github.com/flutter/flutter/blob/main/docs/contributing/Style-guide-for-Flutter-repo.md#features-we-expect-every-widget-to-implement [CLA]: https://cla.developers.google.com/ [flutter/tests]: https://github.com/flutter/tests [breaking change policy]: https://github.com/flutter/flutter/blob/main/docs/contributing/Tree-hygiene.md#handling-breaking-changes [Discord]: https://github.com/flutter/flutter/blob/main/docs/contributing/Chat.md [Data Driven Fixes]: https://github.com/flutter/flutter/blob/main/docs/contributing/Data-driven-Fixes.md --- .../licenses_cpp/data/licenses/google.txt | 31 ++++ .../data/licenses/google_commented.txt | 29 ++++ .../flutter/tools/licenses_cpp/src/catalog.cc | 111 ++++++++++++-- .../flutter/tools/licenses_cpp/src/catalog.h | 27 +++- .../licenses_cpp/src/catalog_unittests.cc | 105 ++++++++++++- .../flutter/tools/licenses_cpp/src/data.cc | 10 +- .../src/flutter/tools/licenses_cpp/src/data.h | 2 + .../tools/licenses_cpp/src/license_checker.cc | 95 ++++++++---- .../src/license_checker_unittests.cc | 145 +++++++++++++++++- 9 files changed, 489 insertions(+), 66 deletions(-) create mode 100644 engine/src/flutter/tools/licenses_cpp/data/licenses/google.txt create mode 100644 engine/src/flutter/tools/licenses_cpp/data/licenses/google_commented.txt diff --git a/engine/src/flutter/tools/licenses_cpp/data/licenses/google.txt b/engine/src/flutter/tools/licenses_cpp/data/licenses/google.txt new file mode 100644 index 00000000000..97a15327a8c --- /dev/null +++ b/engine/src/flutter/tools/licenses_cpp/data/licenses/google.txt @@ -0,0 +1,31 @@ +google +^Copyright \(c\) \d+ Google Inc +Copyright \(c\) \d+ Google Inc. All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are +met: + + \* Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + + \* Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in + the documentation and/or other materials provided with the + distribution. + + \* Neither the name of the copyright holder nor the names of its + contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES \(INCLUDING, BUT NOT +LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +DATA, OR PROFITS; OR BUSINESS INTERRUPTION\) HOWEVER CAUSED AND ON ANY +THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +\(INCLUDING NEGLIGENCE OR OTHERWISE\) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. diff --git a/engine/src/flutter/tools/licenses_cpp/data/licenses/google_commented.txt b/engine/src/flutter/tools/licenses_cpp/data/licenses/google_commented.txt new file mode 100644 index 00000000000..8083e56ac4a --- /dev/null +++ b/engine/src/flutter/tools/licenses_cpp/data/licenses/google_commented.txt @@ -0,0 +1,29 @@ +google commented +^// Copyright \(c\) \d+ Google Inc +// Copyright \(c\) \d+ Google Inc. All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// \* Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// \* Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// \* Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES \(INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION\) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// \(INCLUDING NEGLIGENCE OR OTHERWISE\) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. diff --git a/engine/src/flutter/tools/licenses_cpp/src/catalog.cc b/engine/src/flutter/tools/licenses_cpp/src/catalog.cc index 63672f6cc43..801a3a6e675 100644 --- a/engine/src/flutter/tools/licenses_cpp/src/catalog.cc +++ b/engine/src/flutter/tools/licenses_cpp/src/catalog.cc @@ -4,28 +4,77 @@ #include "flutter/tools/licenses_cpp/src/catalog.h" -absl::StatusOr Catalog::Open(std::string_view data_dir) { - return absl::UnimplementedError(""); -} +#include + +namespace fs = std::filesystem; + +absl::StatusOr Catalog::Open(std::string_view data_dir) { + fs::path data_dir_path(data_dir); + if (!fs::exists(data_dir_path)) { + return absl::InvalidArgumentError( + absl::StrCat("Data directory doesn't exist ", data_dir)); + } + fs::path licenses_path = data_dir_path / "licenses"; + if (!fs::exists(licenses_path)) { + return absl::InvalidArgumentError(absl::StrCat( + "Licenses directory doesn't exist ", licenses_path.string())); + } -absl::StatusOr Catalog::Make( - const std::vector>& entries) { RE2::Set selector(RE2::Options(), RE2::Anchor::UNANCHORED); std::vector> matchers; std::vector names; - for (const std::vector& entry : entries) { - if (entry.size() != 3) { - return absl::InvalidArgumentError("Entry doesn't have 3 items"); + for (const fs::path& file : fs::directory_iterator(licenses_path)) { + std::ifstream infile(file.string()); + if (!infile.good()) { + return absl::InvalidArgumentError("Unable to open file " + file.string()); } + + absl::StatusOr entry = ParseEntry(infile); + if (!entry.ok()) { + return absl::InvalidArgumentError( + absl::StrCat("Unable to parse data entry at ", file.string(), " : ", + entry.status())); + } + std::string err; - names.push_back(std::string(entry[0])); - int idx = selector.Add(entry[1], &err); + selector.Add(entry->unique, &err); + if (!err.empty()) { + return absl::InvalidArgumentError(absl::StrCat( + "Unable to add unique key from ", file.string(), " : ", err)); + } + names.emplace_back(std::move(entry->name)); + + auto matcher_re2 = std::make_unique(entry->matcher); + if (!matcher_re2) { + return absl::InvalidArgumentError("Unable to make matcher."); + } + + matchers.emplace_back(std::move(matcher_re2)); + } + + bool did_compile = selector.Compile(); + if (!did_compile) { + return absl::UnknownError("Unable to compile selector."); + } + + return Catalog(std::move(selector), std::move(matchers), std::move(names)); +} + +absl::StatusOr Catalog::Make(const std::vector& entries) { + RE2::Set selector(RE2::Options(), RE2::Anchor::UNANCHORED); + std::vector> matchers; + std::vector names; + + for (const Entry& entry : entries) { + std::string err; + names.push_back(std::string(entry.name)); + int idx = selector.Add(entry.unique, &err); if (idx < 0) { return absl::InvalidArgumentError( - absl::StrCat("Unable to add set entry: ", entry[1], " ", err)); + absl::StrCat("Unable to add set entry: ", entry.unique, " ", err)); } - matchers.push_back(std::make_unique(entry[2])); + matchers.push_back(std::make_unique(entry.matcher)); } bool did_compile = selector.Compile(); @@ -42,7 +91,8 @@ Catalog::Catalog(RE2::Set selector, matchers_(std::move(matchers)), names_(std::move(names)) {} -absl::StatusOr Catalog::FindMatch(std::string_view query) { +absl::StatusOr Catalog::FindMatch( + std::string_view query) const { std::vector selector_results; if (!selector_.Match(query, &selector_results)) { return absl::NotFoundError("Selector didn't match."); @@ -56,10 +106,39 @@ absl::StatusOr Catalog::FindMatch(std::string_view query) { return absl::InvalidArgumentError(ss.str()); } + std::string_view match_text; + RE2* matcher = matchers_[selector_results[0]].get(); if (selector_results.size() == 1 && - RE2::FullMatch(query, *matchers_[selector_results[0]])) { - return names_[selector_results[0]]; + matcher->Match(query, 0, query.length(), RE2::Anchor::UNANCHORED, + &match_text, + /*nsubmatch=*/1)) { + return Match{.matcher = names_[selector_results[0]], + .matched_text = match_text}; } else { - return absl::NotFoundError("Selection didn't match."); + return absl::NotFoundError(absl::StrCat( + "Selected matcher (", names_[selector_results[0]], ") didn't match.")); } } + +absl::StatusOr Catalog::ParseEntry(std::istream& is) { + if (!is.good()) { + return absl::InvalidArgumentError("Bad stream."); + } + std::string name; + std::getline(is, name); + if (is.eof()) { + return absl::InvalidArgumentError("Bad stream."); + } + std::string unique; + std::getline(is, unique); + if (is.eof()) { + return absl::InvalidArgumentError("Bad stream."); + } + + std::string matcher_text((std::istreambuf_iterator(is)), + std::istreambuf_iterator()); + + return Catalog::Entry{.name = std::move(name), + .unique = std::move(unique), + .matcher = std::move(matcher_text)}; +} diff --git a/engine/src/flutter/tools/licenses_cpp/src/catalog.h b/engine/src/flutter/tools/licenses_cpp/src/catalog.h index 5235cbd1a06..80104d2e1ea 100644 --- a/engine/src/flutter/tools/licenses_cpp/src/catalog.h +++ b/engine/src/flutter/tools/licenses_cpp/src/catalog.h @@ -10,6 +10,7 @@ #include "flutter/third_party/re2/re2/re2.h" #include "flutter/third_party/re2/re2/set.h" +#include #include /// A storage of licenses that can be matched against. @@ -19,20 +20,32 @@ /// that. This approach was chosen to minimize the size of the RE2::Set. class Catalog { public: + /// VisibleForTesting + struct Entry { + std::string name; + std::string unique; + std::string matcher; + }; + + struct Match { + std::string_view matcher; + std::string_view matched_text; + }; + static absl::StatusOr Open(std::string_view data_dir); /// Make a Catalog for testing. - /// The format is [[, , ]*] where the unique - /// regex should only match one license. - static absl::StatusOr Make( - const std::vector>& entries); + static absl::StatusOr Make(const std::vector& entries); /// @brief Tries to identify a match for the `query` across the `Catalog`. - /// @param query The text that will be matched against. @return - /// absl::StatusCode::kNotFound when a match can't be found. + /// @param query The text that will be matched against. + /// @return absl::StatusCode::kNotFound when a match can't be found. /// absl::StatusCode::kInvalidArgument if more than one match comes up from /// the selector. - absl::StatusOr FindMatch(std::string_view query); + absl::StatusOr FindMatch(std::string_view query) const; + + /// VisibleForTesting + static absl::StatusOr ParseEntry(std::istream& is); private: explicit Catalog(RE2::Set selector, diff --git a/engine/src/flutter/tools/licenses_cpp/src/catalog_unittests.cc b/engine/src/flutter/tools/licenses_cpp/src/catalog_unittests.cc index f7df64fc045..00e9932508d 100644 --- a/engine/src/flutter/tools/licenses_cpp/src/catalog_unittests.cc +++ b/engine/src/flutter/tools/licenses_cpp/src/catalog_unittests.cc @@ -4,20 +4,85 @@ #include "flutter/tools/licenses_cpp/src/catalog.h" #include "gtest/gtest.h" +static const char* kEntry = R"entry(google +Copyright \(c\) \d+ Google Inc +Copyright \(c\) \d+ Google Inc. All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are +met: + + \* Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + + \* Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in + the documentation and/or other materials provided with the + distribution. + + \* Neither the name of the copyright holder nor the names of its + contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES \(INCLUDING, BUT NOT +LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +DATA, OR PROFITS; OR BUSINESS INTERRUPTION\) HOWEVER CAUSED AND ON ANY +THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +\(INCLUDING NEGLIGENCE OR OTHERWISE\) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +)entry"; + +static const char* kSkiaLicense = + R"entry(Copyright (c) 2011 Google Inc. All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are +met: + + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in + the documentation and/or other materials provided with the + distribution. + + * Neither the name of the copyright holder nor the names of its + contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +)entry"; + TEST(CatalogTest, Simple) { absl::StatusOr catalog = Catalog::Make({{"foobar", ".*foo.*", ".*foo.*"}}); ASSERT_TRUE(catalog.ok()); - absl::StatusOr match = catalog->FindMatch("foo"); + absl::StatusOr match = catalog->FindMatch("foo"); ASSERT_TRUE(match.ok()); - ASSERT_EQ(*match, "foobar"); + ASSERT_EQ(match->matcher, "foobar"); } TEST(CatalogTest, MultipleMatch) { absl::StatusOr catalog = Catalog::Make({{"foobar", ".*foo.*", ""}, {"oo", ".*oo.*", ""}}); ASSERT_TRUE(catalog.ok()) << catalog.status(); - absl::StatusOr has_match = catalog->FindMatch("foo"); + absl::StatusOr has_match = catalog->FindMatch("foo"); ASSERT_FALSE(has_match.ok()); ASSERT_TRUE(RE2::PartialMatch(has_match.status().message(), "Multiple unique matches found")) @@ -30,7 +95,7 @@ TEST(CatalogTest, NoSelectorMatch) { absl::StatusOr catalog = Catalog::Make({{"foobar", ".*bar.*", ".*foo.*"}}); ASSERT_TRUE(catalog.ok()); - absl::StatusOr match = catalog->FindMatch("foo"); + absl::StatusOr match = catalog->FindMatch("foo"); ASSERT_FALSE(match.ok()); ASSERT_EQ(match.status().code(), absl::StatusCode::kNotFound); } @@ -39,7 +104,37 @@ TEST(CatalogTest, NoSelectionMatch) { absl::StatusOr catalog = Catalog::Make({{"foobar", ".*foo.*", ".*bar.*"}}); ASSERT_TRUE(catalog.ok()); - absl::StatusOr match = catalog->FindMatch("foo"); + absl::StatusOr match = catalog->FindMatch("foo"); ASSERT_FALSE(match.ok()); ASSERT_EQ(match.status().code(), absl::StatusCode::kNotFound); } + +TEST(CatalogTest, SimpleParseEntry) { + std::stringstream ss; + ss << "foobar\n"; + ss << "unique\n"; + ss << R"match(Multiline +matcher +.*)match"; + + absl::StatusOr entry = Catalog::ParseEntry(ss); + EXPECT_TRUE(entry.ok()) << entry.status(); + if (entry.ok()) { + EXPECT_EQ(entry->name, "foobar"); + EXPECT_EQ(entry->unique, "unique"); + EXPECT_EQ(entry->matcher, R"match(Multiline +matcher +.*)match"); + } +} + +TEST(CatalogTest, SkiaLicense) { + std::stringstream ss; + ss << kEntry; + absl::StatusOr entry = Catalog::ParseEntry(ss); + ASSERT_TRUE(entry.ok()) << entry.status(); + absl::StatusOr catalog = Catalog::Make({*entry}); + ASSERT_TRUE(catalog.ok()); + absl::StatusOr match = catalog->FindMatch(kSkiaLicense); + EXPECT_TRUE(match.ok()) << match.status(); +} diff --git a/engine/src/flutter/tools/licenses_cpp/src/data.cc b/engine/src/flutter/tools/licenses_cpp/src/data.cc index 4b6ca244feb..f3f2d3a7a19 100644 --- a/engine/src/flutter/tools/licenses_cpp/src/data.cc +++ b/engine/src/flutter/tools/licenses_cpp/src/data.cc @@ -26,6 +26,14 @@ absl::StatusOr Data::Open(std::string_view data_dir) { exclude_path.string() + ": " + include_filter.status().ToString()); } + absl::StatusOr catalog = Catalog::Open(data_dir); + if (!catalog.ok()) { + return absl::InvalidArgumentError("Can't open catalog at " + + exclude_path.string() + ": " + + catalog.status().ToString()); + } + return Data{.include_filter = std::move(*include_filter), - .exclude_filter = std::move(*exclude_filter)}; + .exclude_filter = std::move(*exclude_filter), + .catalog = std::move(*catalog)}; } diff --git a/engine/src/flutter/tools/licenses_cpp/src/data.h b/engine/src/flutter/tools/licenses_cpp/src/data.h index 2c72bc0dc24..4ce4c4e7cbe 100644 --- a/engine/src/flutter/tools/licenses_cpp/src/data.h +++ b/engine/src/flutter/tools/licenses_cpp/src/data.h @@ -6,6 +6,7 @@ #define FLUTTER_TOOLS_LICENSES_CPP_SRC_DATA_H_ #include "flutter/third_party/abseil-cpp/absl/status/statusor.h" +#include "flutter/tools/licenses_cpp/src/catalog.h" #include "flutter/tools/licenses_cpp/src/filter.h" /// In memory representation of the contents of the data directory @@ -15,6 +16,7 @@ struct Data { static absl::StatusOr Open(std::string_view data_dir); Filter include_filter; Filter exclude_filter; + Catalog catalog; }; #endif // FLUTTER_TOOLS_LICENSES_CPP_SRC_DATA_H_ diff --git a/engine/src/flutter/tools/licenses_cpp/src/license_checker.cc b/engine/src/flutter/tools/licenses_cpp/src/license_checker.cc index 5ba6ea59e93..e262c60add0 100644 --- a/engine/src/flutter/tools/licenses_cpp/src/license_checker.cc +++ b/engine/src/flutter/tools/licenses_cpp/src/license_checker.cc @@ -152,17 +152,6 @@ Package GetPackage(const fs::path& working_dir, const fs::path& full_path) { return result; } -std::string ReadFile(const fs::path& path) { - std::ifstream stream(path); - assert(stream.good()); - std::string license((std::istreambuf_iterator(stream)), - std::istreambuf_iterator()); - if (license[license.size() - 1] == '\n') { - license.pop_back(); - } - return license; -} - class LicenseMap { public: void Add(std::string_view package, std::string_view license) { @@ -176,13 +165,6 @@ class LicenseMap { } } - void AddFile(std::string_view package, const fs::path& path) { - if (!license_files_.contains(path.string())) { - Add(package, ReadFile(path)); - license_files_.insert(path); - } - } - void Write(std::ostream& licenses) { LicensesWriter writer(licenses); for (const auto& comment_entry : map_) { @@ -194,6 +176,40 @@ class LicenseMap { absl::btree_map> map_; absl::flat_hash_set license_files_; }; + +/// Checks the a license against known licenses and potentially adds it to the +/// license map. +/// @param path Path of the license file to check. +/// @param package Package the license file belongs to. +/// @param data The Data catalog of known licenses. +/// @param license_map The LicenseMap tracking seen licenses. +/// @return OkStatus if the license is known and successfully written to the +/// catalog. +absl::Status MatchLicenseFile(const fs::path& path, + const Package& package, + const Data& data, + LicenseMap* license_map) { + if (!package.license_file.has_value()) { + return absl::InvalidArgumentError("No license file."); + } + absl::StatusOr license = MMapFile::Make(path.string()); + if (!license.ok()) { + return license.status(); + } else { + absl::StatusOr match = data.catalog.FindMatch( + std::string_view(license->GetData(), license->GetSize())); + + if (match.ok()) { + license_map->Add(package.name, match->matched_text); + } else { + return absl::NotFoundError(absl::StrCat("Unknown license in ", + package.license_file->string(), + " : ", match.status().message())); + } + } + return absl::OkStatus(); +} + } // namespace std::vector LicenseChecker::Run(std::string_view working_dir, @@ -207,6 +223,7 @@ std::vector LicenseChecker::Run(std::string_view working_dir, size_t count = 0; LicenseMap license_map; + absl::flat_hash_set seen_license_files; for (const fs::path& git_repo : git_repos) { if (IsStdoutTerminal()) { PrintProgress(count++, git_repos.size()); @@ -228,7 +245,15 @@ std::vector LicenseChecker::Run(std::string_view working_dir, Package package = GetPackage(working_dir_path, full_path); if (package.license_file.has_value()) { - license_map.AddFile(package.name, package.license_file.value()); + auto [_, is_new_item] = + seen_license_files.insert(package.license_file.value()); + if (is_new_item) { + absl::Status match_status = MatchLicenseFile( + package.license_file.value(), package, data, &license_map); + if (!match_status.ok()) { + errors.emplace_back(std::move(match_status)); + } + } } VLOG(1) << full_path.string(); @@ -243,18 +268,26 @@ std::vector LicenseChecker::Run(std::string_view working_dir, return errors; } } - IterateComments(file->GetData(), file->GetSize(), - [&](std::string_view comment) { - VLOG(2) << comment; - re2::StringPiece match; - if (RE2::PartialMatch(comment, pattern, &match)) { - did_find_copyright = true; - VLOG(1) << comment; - if (!package.license_file.has_value()) { - license_map.Add(package.name, comment); - } - } - }); + IterateComments( + file->GetData(), file->GetSize(), [&](std::string_view comment) { + VLOG(2) << comment; + re2::StringPiece match; + if (RE2::PartialMatch(comment, pattern, &match)) { + did_find_copyright = true; + VLOG(1) << comment; + if (!package.license_file.has_value()) { + absl::StatusOr match = + data.catalog.FindMatch(comment); + if (match.ok()) { + license_map.Add(package.name, match->matched_text); + } else { + errors.emplace_back(absl::NotFoundError( + absl::StrCat("Unknown license in ", full_path.string(), + " : ", match.status().message()))); + } + } + } + }); if (!did_find_copyright && !package.license_file.has_value()) { errors.push_back( absl::NotFoundError("Expected copyright in " + full_path.string())); diff --git a/engine/src/flutter/tools/licenses_cpp/src/license_checker_unittests.cc b/engine/src/flutter/tools/licenses_cpp/src/license_checker_unittests.cc index d4957cfc11f..fa3658234aa 100644 --- a/engine/src/flutter/tools/licenses_cpp/src/license_checker_unittests.cc +++ b/engine/src/flutter/tools/licenses_cpp/src/license_checker_unittests.cc @@ -56,6 +56,13 @@ void main() { } )header"; +const char* kUnknownHeader = R"header( +// Unknown Copyright + +void main() { +} +)header"; + const char* kCHeader = R"header( /* C Copyright Test @@ -69,6 +76,11 @@ const char* kLicense = R"lic(Test License v2.0 )lic"; +const char* kUnknownLicense = R"lic(Unknown License +2025 +v2.0 +)lic"; + absl::StatusOr MakeTestData() { std::stringstream include; include << ".*\\.cc" << std::endl; @@ -82,19 +94,29 @@ absl::StatusOr MakeTestData() { if (!exclude_filter.ok()) { return exclude_filter.status(); } + + absl::StatusOr catalog = + Catalog::Make({{"test", "Test License", R"lic(Test License +v\d\.\d)lic"}, + {"header", "Copyright Test", "(?:C )?Copyright Test"}}); + if (!catalog.ok()) { + return catalog.status(); + } + return Data{ .include_filter = std::move(*include_filter), .exclude_filter = std::move(*exclude_filter), + .catalog = std::move(catalog.value()), }; } -absl::Status WriteFile(const char* data, const fs::path& path) { +absl::Status WriteFile(std::string_view data, const fs::path& path) { std::ofstream of; of.open(path.string(), std::ios::binary); if (!of.good()) { return absl::InternalError("can't open file"); } - of.write(data, std::strlen(data)); + of.write(data.data(), data.length()); of.close(); return absl::OkStatus(); } @@ -152,7 +174,56 @@ TEST_F(LicenseCheckerTest, SimplePass) { std::stringstream ss; std::vector errors = LicenseChecker::Run(temp_path->string(), ss, *data); - EXPECT_EQ(errors.size(), 0u); + EXPECT_EQ(errors.size(), 0u) << errors[0]; +} + +TEST_F(LicenseCheckerTest, UnknownFileLicense) { + absl::StatusOr temp_path = MakeTempDir(); + ASSERT_TRUE(temp_path.ok()); + + absl::StatusOr data = MakeTestData(); + ASSERT_TRUE(data.ok()); + + fs::current_path(*temp_path); + ASSERT_TRUE(WriteFile(kUnknownHeader, *temp_path / "main.cc").ok()); + Repo repo; + repo.Add(*temp_path / "main.cc"); + ASSERT_TRUE(repo.Commit().ok()); + + std::stringstream ss; + std::vector errors = + LicenseChecker::Run(temp_path->string(), ss, *data); + EXPECT_EQ(errors.size(), 1u); + EXPECT_TRUE(FindError(errors, absl::StatusCode::kNotFound, + "Unknown license in.*main.cc")) + << errors[0]; +} + +TEST_F(LicenseCheckerTest, UnknownLicense) { + absl::StatusOr temp_path = MakeTempDir(); + ASSERT_TRUE(temp_path.ok()); + + absl::StatusOr data = MakeTestData(); + ASSERT_TRUE(data.ok()); + + fs::current_path(*temp_path); + ASSERT_TRUE(WriteFile(kHeader, *temp_path / "main.cc").ok()); + // Make sure the error is only reported once. + ASSERT_TRUE(WriteFile(kHeader, *temp_path / "foo.cc").ok()); + ASSERT_TRUE(WriteFile(kUnknownLicense, *temp_path / "LICENSE").ok()); + Repo repo; + repo.Add(*temp_path / "main.cc"); + repo.Add(*temp_path / "foo.cc"); + repo.Add(*temp_path / "LICENSE"); + ASSERT_TRUE(repo.Commit().ok()); + + std::stringstream ss; + std::vector errors = + LicenseChecker::Run(temp_path->string(), ss, *data); + EXPECT_EQ(errors.size(), 1u); + EXPECT_TRUE(FindError(errors, absl::StatusCode::kNotFound, + "Unknown license in.*LICENSE")) + << errors[0]; } TEST_F(LicenseCheckerTest, SimpleMissingFileLicense) { @@ -192,7 +263,7 @@ TEST_F(LicenseCheckerTest, SimpleWritesFileLicensesFile) { std::stringstream ss; std::vector errors = LicenseChecker::Run(temp_path->string(), ss, *data); - EXPECT_EQ(errors.size(), 0u); + EXPECT_EQ(errors.size(), 0u) << errors[0]; EXPECT_EQ(ss.str(), R"output(engine @@ -223,7 +294,6 @@ TEST_F(LicenseCheckerTest, SimpleWritesTwoFileLicensesFiles) { EXPECT_EQ(ss.str(), R"output(engine C Copyright Test - -------------------------------------------------------------------------------- engine @@ -313,7 +383,7 @@ v2.0 )output"); } -TEST_F(LicenseCheckerTest, ThirdyPartyDirectoryLicense) { +TEST_F(LicenseCheckerTest, ThirdPartyDirectoryLicense) { absl::StatusOr temp_path = MakeTempDir(); ASSERT_TRUE(temp_path.ok()); @@ -348,3 +418,66 @@ Test License v2.0 )output"); } + +TEST_F(LicenseCheckerTest, OnlyPrintMatch) { + absl::StatusOr temp_path = MakeTempDir(); + ASSERT_TRUE(temp_path.ok()); + + absl::StatusOr data = MakeTestData(); + ASSERT_TRUE(data.ok()); + + fs::current_path(*temp_path); + ASSERT_TRUE(WriteFile(kHeader, *temp_path / "main.cc").ok()); + ASSERT_TRUE(WriteFile(absl::StrCat(kLicense, "\n----------------------\n"), + *temp_path / "LICENSE") + .ok()); + Repo repo; + repo.Add(*temp_path / "main.cc"); + repo.Add(*temp_path / "LICENSE"); + ASSERT_TRUE(repo.Commit().ok()); + + std::stringstream ss; + std::vector errors = + LicenseChecker::Run(temp_path->string(), ss, *data); + EXPECT_EQ(errors.size(), 0u) << errors[0]; + + EXPECT_EQ(ss.str(), R"output(engine + +Test License +v2.0 +)output"); +} + +TEST_F(LicenseCheckerTest, OnlyPrintMatchHeader) { + absl::StatusOr temp_path = MakeTempDir(); + ASSERT_TRUE(temp_path.ok()); + + absl::StatusOr data = MakeTestData(); + ASSERT_TRUE(data.ok()); + + fs::current_path(*temp_path); + ASSERT_TRUE(WriteFile(R"header( +// Extra text. +// Copyright Test +// +// Extra text. + +void main() { +} +)header", + *temp_path / "main.cc") + .ok()); + Repo repo; + repo.Add(*temp_path / "main.cc"); + ASSERT_TRUE(repo.Commit().ok()); + + std::stringstream ss; + std::vector errors = + LicenseChecker::Run(temp_path->string(), ss, *data); + EXPECT_EQ(errors.size(), 0u) << errors[0]; + + EXPECT_EQ(ss.str(), R"output(engine + +Copyright Test +)output"); +}