Osv-scanner integration (flutter/engine#49470)

This change integrates directly with osv-scanner rather than using the OSV API to query each dependency for vulnerabilities.

- use [lockfile format](https://github.com/flutter/engine/pull/49203) for third party deps rather than a flat file
- let osv-scanner do the work of checking for vulns rather than API calls to OSV database
- let osv-scanner create and populate the SARIF results 

A successful run of the vulnerability scanner can be seen in the tests of this PR.

*If you had to change anything in the [flutter/tests] repo, include a link to the migration guide as per the [breaking change policy].*

[C++, Objective-C, Java style guides]: https://github.com/flutter/engine/blob/main/CONTRIBUTING.md#style
This commit is contained in:
Jesse Seales 2024-01-11 18:59:47 +00:00 committed by GitHub
parent 4610aea866
commit 4fa8327ffe
4 changed files with 271 additions and 362 deletions

View File

@ -2,8 +2,6 @@ name: Third party deps scan
on:
# Only the default branch is supported.
branch_protection_rule:
push:
branches: [ main ]
pull_request:
types: [ labeled ]
@ -11,18 +9,14 @@ on:
permissions: read-all
jobs:
vuln-scan:
name: Vulnerability scanning
extract-deps:
name: Extract Dependencies
runs-on: ubuntu-20.04
# run on flutter/engine push to main or PRs with 'vulnerability patch' label
if: ${{ github.repository == 'flutter/engine' && (github.event_name == 'push' || github.event.label.name == 'vulnerability scan') }}
if: ${{ (github.repository == 'flutter/engine' && github.event_name == 'push') || github.event.label.name == 'vulnerability scan' }}
permissions:
# Needed to upload the SARIF results to code-scanning dashboard.
security-events: write
actions: read
contents: read
# Needed to access OIDC token.
id-token: write
steps:
- name: "Checkout code"
uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11
@ -32,19 +26,29 @@ jobs:
uses: actions/setup-python@0a5c61591373683505ea898e09a3ea4f39ef2b9c
with:
python-version: '3.7.7' # install the python version needed
- name: "extract and flatten deps"
- name: "extract, flatten, and convert to osv-scanner deps"
run: python ci/deps_parser.py
- name: "scan deps for vulnerabilities"
run: python ci/scan_flattened_deps.py
# Upload the results as artifacts.
- name: "Upload artifact"
uses: actions/upload-artifact@c7d193f32edcb7bfad88892161225aeda64e9392
- name: "evaluate git history for common ancestors"
run: python ci/scan_deps.py
- name: "upload osv-scanner deps"
uses: actions/upload-artifact@a8a3f3ad30e3422c9c7b888a15615d19a852ae32
with:
name: SARIF file
path: osvReport.sarif
retention-days: 5
# Upload the results to GitHub's code scanning dashboard.
- name: "Upload to security tab"
uses: github/codeql-action/upload-sarif@cdcdbb579706841c47f7063dda365e292e5cad7a
with:
sarif_file: osvReport.sarif
name: converted-osv-lockfile
path: converted-osv-lockfile.json
retention-days: 2
vuln-scan:
name: Vulnerability scanning
uses: "google/osv-scanner/.github/workflows/osv-scanner-reusable.yml@main"
with:
# Download the artifact uploaded in extract-deps step
download-artifact: converted-osv-lockfile
scan-args: |-
--lockfile=osv-scanner:converted-osv-lockfile.json
fail-on-vuln: false
# makes sure the osv-formatted vulns are uploaded
needs:
extract-deps
permissions:
# Needed to upload the SARIF results to code-scanning dashboard.
security-events: write
contents: read

View File

@ -4,13 +4,14 @@
# Use of this source code is governed by a BSD-style license that can be
# found in the LICENSE file.
#
# Usage: deps_parser.py --deps <DEPS file> --output <flattened deps>
# Usage: deps_parser.py --deps <DEPS file> --output <lockfile>
#
# This script parses the DEPS file, extracts the fully qualified dependencies
# and writes the to a file. This file will be later used to validate the dependencies
# are pinned to a hash.
import argparse
import json
import os
import re
import sys
@ -19,7 +20,7 @@ SCRIPT_DIR = os.path.dirname(sys.argv[0])
CHECKOUT_ROOT = os.path.realpath(os.path.join(SCRIPT_DIR, '..'))
CHROMIUM_README_FILE = 'third_party/accessibility/README.md'
CHROMIUM_README_COMMIT_LINE = 4 # the fifth line will always contain the commit hash
CHROMIUM_README_COMMIT_LINE = 4 # The fifth line will always contain the commit hash.
CHROMIUM = 'https://chromium.googlesource.com/chromium/src'
@ -37,7 +38,7 @@ class VarImpl:
"""Implements the Var syntax."""
if var_name in self._local_scope.get('vars', {}):
return self._local_scope['vars'][var_name]
# Inject default values for env variables
# Inject default values for env variables.
if var_name in self._env_vars:
return self._env_vars[var_name]
raise Exception('Var is not defined: %s' % var_name)
@ -59,16 +60,26 @@ def parse_deps_file(deps_file):
# Extract the deps and filter.
deps = local_scope.get('deps', {})
filtered_deps = []
filtered_osv_deps = []
for _, dep in deps.items():
# We currently do not support packages or cipd which are represented
# as dictionaries.
if isinstance(dep, str):
filtered_deps.append(dep)
return filtered_deps
if not isinstance(dep, str):
continue
dep_split = dep.rsplit('@', 1)
filtered_osv_deps.append({
'package': {'name': dep_split[0], 'commit': dep_split[1]}
})
osv_result = {
'packageSource': {'path': deps_file, 'type': 'lockfile'},
'packages': filtered_osv_deps
}
return osv_result
def parse_readme(deps):
def parse_readme():
"""
Opens the Flutter Accessibility Library README and uses the commit hash
found in the README to check for viulnerabilities.
@ -76,24 +87,30 @@ def parse_readme(deps):
"""
file_path = os.path.join(CHECKOUT_ROOT, CHROMIUM_README_FILE)
with open(file_path) as file:
# read the content of the file opened
# Read the content of the file opened.
content = file.readlines()
commit_line = content[CHROMIUM_README_COMMIT_LINE]
commit = re.search(r'(?<=\[).*(?=\])', commit_line)
deps.append(CHROMIUM + '@' + commit.group())
return deps
osv_result = {
'packageSource': {'path': file_path, 'type': 'lockfile'},
'packages': [{'package': {'name': CHROMIUM, 'commit': commit.group()}}]
}
return osv_result
def write_manifest(deps, manifest_file):
print('\n'.join(sorted(deps)))
output = {'results': deps}
print(json.dumps(output, indent=2))
with open(manifest_file, 'w') as manifest:
manifest.write('\n'.join(sorted(deps)))
json.dump(output, manifest, indent=2)
def parse_args(args):
args = args[1:]
parser = argparse.ArgumentParser(
description='A script to flatten a gclient DEPS file.'
description='A script to extract DEPS into osv-scanner lockfile compatible format.'
)
parser.add_argument(
@ -107,8 +124,8 @@ def parse_args(args):
'--output',
'-o',
type=str,
help='Output flattened deps file.',
default=os.path.join(CHECKOUT_ROOT, 'deps_flatten.txt')
help='Output lockfile.',
default=os.path.join(CHECKOUT_ROOT, 'osv-lockfile.json')
)
return parser.parse_args(args)
@ -116,9 +133,9 @@ def parse_args(args):
def main(argv):
args = parse_args(argv)
deps = parse_deps_file(args.deps)
deps = parse_readme(deps)
write_manifest(deps, args.output)
deps_deps = parse_deps_file(args.deps)
readme_deps = parse_readme()
write_manifest([deps_deps, readme_deps], args.output)
return 0

View File

@ -0,0 +1,208 @@
#!/usr/bin/env python3
#
# Copyright 2013 The Flutter Authors. All rights reserved.
# Use of this source code is governed by a BSD-style license that can be
# found in the LICENSE file.
#
# Usage: scan_deps.py --osv-lockfile <lockfile> --output <parsed lockfile>
#
# This script parses the dependencies provided in lockfile format for
# osv-scanner so that the common ancestor commits from the mirrored and
# upstream for each dependency are provided in the lockfile
# It is expected that the osv-lockfile input is updated by this script
# and then uploaded using GitHub actions to be used by the osv-scanner
# reusable action.
import argparse
import json
import os
import shutil
import subprocess
import sys
from compatibility_helper import byte_str_decode
SCRIPT_DIR = os.path.dirname(sys.argv[0])
CHECKOUT_ROOT = os.path.realpath(os.path.join(SCRIPT_DIR, '..'))
DEP_CLONE_DIR = CHECKOUT_ROOT + '/clone-test'
DEPS = os.path.join(CHECKOUT_ROOT, 'DEPS')
UPSTREAM_PREFIX = 'upstream_'
failed_deps = [] # Deps which fail to be cloned or git-merge based.
def parse_deps_file(lockfile, output_file):
"""
Takes input of fully qualified dependencies,
for each dep find the common ancestor commit SHA
from the upstream and query OSV API using that SHA
If the commit cannot be found or the dep cannot be
compared to an upstream, prints list of those deps
"""
deps_list = []
with open(DEPS, 'r') as file:
local_scope = {}
global_scope = {'Var': lambda x: x} # Dummy lambda.
# Read the content.
deps_content = file.read()
# Eval the content.
exec(deps_content, global_scope, local_scope)
# Extract the deps and filter.
deps_list = local_scope.get('vars')
with open(lockfile, 'r') as file:
data = json.load(file)
results = data['results']
if not os.path.exists(DEP_CLONE_DIR):
os.mkdir(DEP_CLONE_DIR) # Clone deps with upstream into temporary dir.
# Extract commit hash, save in dictionary.
for result in results:
packages = result['packages']
for package in packages:
mirror_url = package['package']['name']
commit = package['package']['commit']
ancestor_result = get_common_ancestor([mirror_url, commit], deps_list)
if ancestor_result:
common_commit, upstream = ancestor_result
package['package']['commit'] = common_commit
package['package']['name'] = upstream
try:
# Clean up cloned upstream dependency directory.
shutil.rmtree(
DEP_CLONE_DIR
) # Use shutil.rmtree since dir could be non-empty.
except OSError as clone_dir_error:
print(
'Error cleaning up clone directory: %s : %s' %
(DEP_CLONE_DIR, clone_dir_error.strerror)
)
# Write common ancestor commit data to new file to be
# used in next github action step with osv-scanner.
# The output_file name defaults to converted-osv-lockfile.json
with open(output_file, 'w') as file:
json.dump(data, file)
def get_common_ancestor(dep, deps_list):
"""
Given an input of a mirrored dep,
compare to the mapping of deps to their upstream
in DEPS and find a common ancestor
commit SHA value.
This is done by first cloning the mirrored dep,
then a branch which tracks the upstream.
From there, git merge-base operates using the HEAD
commit SHA of the upstream branch and the pinned
SHA value of the mirrored branch
"""
# dep[0] contains the mirror repo.
# dep[1] contains the mirror's pinned SHA.
# upstream is the origin repo.
dep_name = dep[0].split('/')[-1].split('.')[0]
if UPSTREAM_PREFIX + dep_name not in deps_list:
print('did not find dep: ' + dep_name)
return None
try:
# Get the upstream URL from the mapping in DEPS file.
upstream = deps_list.get(UPSTREAM_PREFIX + dep_name)
temp_dep_dir = DEP_CLONE_DIR + '/' + dep_name
# Clone dependency from mirror.
subprocess.check_output(['git', 'clone', '--quiet', '--', dep[0], dep_name],
cwd=DEP_CLONE_DIR)
# Create branch that will track the upstream dep.
print(
'attempting to add upstream remote from: {upstream}'.format(
upstream=upstream
)
)
subprocess.check_output(['git', 'remote', 'add', 'upstream', upstream],
cwd=temp_dep_dir)
subprocess.check_output(['git', 'fetch', '--quiet', 'upstream'],
cwd=temp_dep_dir)
# Get name of the default branch for upstream (e.g. main/master/etc.).
default_branch = subprocess.check_output(
'git remote show upstream ' + "| sed -n \'/HEAD branch/s/.*: //p\'",
cwd=temp_dep_dir,
shell=True
)
default_branch = byte_str_decode(default_branch)
default_branch = default_branch.strip()
# Make upstream branch track the upstream dep.
subprocess.check_output([
'git', 'checkout', '--force', '-b', 'upstream', '--track',
'upstream/' + default_branch
],
cwd=temp_dep_dir)
# Get the most recent commit from default branch of upstream.
commit = subprocess.check_output(
'git for-each-ref ' +
"--format=\'%(objectname:short)\' refs/heads/upstream",
cwd=temp_dep_dir,
shell=True
)
commit = byte_str_decode(commit)
commit = commit.strip()
# Perform merge-base on most recent default branch commit and pinned mirror commit.
ancestor_commit = subprocess.check_output(
'git merge-base {commit} {depUrl}'.format(commit=commit, depUrl=dep[1]),
cwd=temp_dep_dir,
shell=True
)
ancestor_commit = byte_str_decode(ancestor_commit)
ancestor_commit = ancestor_commit.strip()
print('Ancestor commit: ' + ancestor_commit)
return ancestor_commit, upstream
except subprocess.CalledProcessError as error:
print(
"Subprocess command '{0}' failed with exit code: {1}.".format(
error.cmd, str(error.returncode)
)
)
if error.output:
print("Subprocess error output: '{0}'".format(error.output))
return None
def parse_args(args):
args = args[1:]
parser = argparse.ArgumentParser(
description='A script to find common ancestor commit SHAs'
)
parser.add_argument(
'--osv-lockfile',
'-d',
type=str,
help='Input osv-scanner compatible lockfile of dependencies to parse.',
default=os.path.join(CHECKOUT_ROOT, 'osv-lockfile.json')
)
parser.add_argument(
'--output',
'-o',
type=str,
help='Output osv-scanner compatible deps file.',
default=os.path.join(CHECKOUT_ROOT, 'converted-osv-lockfile.json')
)
return parser.parse_args(args)
def main(argv):
args = parse_args(argv)
parse_deps_file(args.osv_lockfile, args.output)
return 0
if __name__ == '__main__':
sys.exit(main(sys.argv))

View File

@ -1,320 +0,0 @@
#!/usr/bin/env python3
#
# Copyright 2013 The Flutter Authors. All rights reserved.
# Use of this source code is governed by a BSD-style license that can be
# found in the LICENSE file.
#
# Usage: scan_flattened_deps.py --flat-deps <flat DEPS file> --output <vulnerability report>
#
# This script parses the flattened, fully qualified dependencies,
# and uses the OSV API to check for known vulnerabilities
# for the given hash of the dependency
import argparse
import json
import os
import shutil
import subprocess
import sys
from urllib import request
from compatibility_helper import byte_str_decode
SCRIPT_DIR = os.path.dirname(sys.argv[0])
CHECKOUT_ROOT = os.path.realpath(os.path.join(SCRIPT_DIR, '..'))
DEP_CLONE_DIR = CHECKOUT_ROOT + '/clone-test'
DEPS = os.path.join(CHECKOUT_ROOT, 'DEPS')
HELP_STR = 'To find complete information on this vulnerability, navigate to '
OSV_VULN_DB_URL = 'https://osv.dev/vulnerability/'
SECONDS_PER_YEAR = 31556952
UPSTREAM_PREFIX = 'upstream_'
failed_deps = [] # deps which fail to be cloned or git-merge based
sarif_log = {
'$schema':
'https://json.schemastore.org/sarif-2.1.0.json', 'version':
'2.1.0',
'runs': [{
'tool': {
'driver': {
'name': 'OSV Scan', 'informationUri': 'https://osv.dev/',
'semanticVersion': '1.0.0', 'rules': []
}
}, 'results': []
}]
}
def sarif_result():
"""
Returns the template for a result entry in the Sarif log,
which is populated with CVE findings from OSV API
"""
return {
'ruleId':
'N/A', 'message': {'text': 'OSV Scan Finding'}, 'locations': [{
'physicalLocation': {
'artifactLocation': {'uri': 'DEPS'},
'region': {'startLine': 1, 'startColumn': 1, 'endColumn': 1}
}
}]
}
def sarif_rule():
"""
Returns the template for a rule entry in the Sarif log,
which is populated with CVE findings from OSV API
"""
return {
'id': 'OSV Scan', 'name': 'OSV Scan Finding',
'shortDescription': {'text': 'Insert OSV id'}, 'fullDescription': {
'text': 'Vulnerability found by scanning against the OSV API'
}, 'help': {
'text':
'More details in the OSV DB at: https://osv.dev/vulnerability/'
}, 'defaultConfiguration': {'level': 'error'},
'properties': {'tags': ['supply-chain', 'dependency']}
}
def parse_deps_file(deps_flat_file):
"""
Takes input of fully qualified dependencies,
for each dep find the common ancestor commit SHA
from the upstream and query OSV API using that SHA
If the commit cannot be found or the dep cannot be
compared to an upstream, prints list of those deps
"""
deps_list = []
with open(DEPS, 'r') as file:
local_scope = {}
global_scope = {'Var': lambda x: x} # dummy lambda
# Read the content.
deps_content = file.read()
# Eval the content.
exec(deps_content, global_scope, local_scope)
# Extract the deps and filter.
deps_list = local_scope.get('vars')
queries = [] # list of queries to submit in bulk request to OSV API
with open(deps_flat_file, 'r') as file:
lines = file.readlines()
headers = {
'Content-Type': 'application/json',
}
osv_url = 'https://api.osv.dev/v1/querybatch'
if not os.path.exists(DEP_CLONE_DIR):
os.mkdir(DEP_CLONE_DIR) #clone deps with upstream into temporary dir
# Extract commit hash, save in dictionary
for line in lines:
dep = line.strip().split(
'@'
) # separate fully qualified dep into name + pinned hash
common_commit = get_common_ancestor_commit(dep, deps_list)
if isinstance(common_commit, str):
queries.append({'commit': common_commit})
else:
failed_deps.append(dep[0])
print(
'Dependencies that could not be parsed for ancestor commits: ' +
', '.join(failed_deps)
)
try:
# clean up cloned upstream dependency directory
shutil.rmtree(
DEP_CLONE_DIR
) # use shutil.rmtree since dir could be non-empty
except OSError as clone_dir_error:
print(
'Error cleaning up clone directory: %s : %s' %
(DEP_CLONE_DIR, clone_dir_error.strerror)
)
# Query OSV API using common ancestor commit for each dep
# return any vulnerabilities found.
data = json.dumps({'queries': queries}).encode('utf-8')
req = request.Request(osv_url, data, headers=headers)
with request.urlopen(req) as resp:
res_body = resp.read()
results_json = json.loads(res_body.decode('utf-8'))
if resp.status != 200:
print('Request error')
elif results_json['results'] == [{}]:
print('Found no vulnerabilities')
else:
results = results_json['results']
filtered_results = list(filter(lambda vuln: vuln != {}, results))
if len(filtered_results) > 0:
print(
'Found vulnerability on {vuln_count} dependenc(y/ies), adding to report'
.format(vuln_count=str(len(filtered_results)))
)
print(*filtered_results)
return filtered_results
print('Found no vulnerabilities')
return {}
def get_common_ancestor_commit(dep, deps_list):
"""
Given an input of a mirrored dep,
compare to the mapping of deps to their upstream
in DEPS and find a common ancestor
commit SHA value.
This is done by first cloning the mirrored dep,
then a branch which tracks the upstream.
From there, git merge-base operates using the HEAD
commit SHA of the upstream branch and the pinned
SHA value of the mirrored branch
"""
# dep[0] contains the mirror repo
# dep[1] contains the mirror's pinned SHA
# upstream is the origin repo
dep_name = dep[0].split('/')[-1].split('.')[0]
if UPSTREAM_PREFIX + dep_name not in deps_list:
print('did not find dep: ' + dep_name)
return {}
try:
# get the upstream URL from the mapping in DEPS file
upstream = deps_list.get(UPSTREAM_PREFIX + dep_name)
temp_dep_dir = DEP_CLONE_DIR + '/' + dep_name
# clone dependency from mirror
subprocess.check_output(['git', 'clone', '--quiet', '--', dep[0], dep_name],
cwd=DEP_CLONE_DIR)
# create branch that will track the upstream dep
print(
'attempting to add upstream remote from: {upstream}'.format(
upstream=upstream
)
)
subprocess.check_output(['git', 'remote', 'add', 'upstream', upstream],
cwd=temp_dep_dir)
subprocess.check_output(['git', 'fetch', '--quiet', 'upstream'],
cwd=temp_dep_dir)
# get name of the default branch for upstream (e.g. main/master/etc.)
default_branch = subprocess.check_output(
'git remote show upstream ' + "| sed -n \'/HEAD branch/s/.*: //p\'",
cwd=temp_dep_dir,
shell=True
)
default_branch = byte_str_decode(default_branch)
default_branch = default_branch.strip()
# make upstream branch track the upstream dep
subprocess.check_output([
'git', 'checkout', '--force', '-b', 'upstream', '--track',
'upstream/' + default_branch
],
cwd=temp_dep_dir)
# get the most recent commit from default branch of upstream
commit = subprocess.check_output(
'git for-each-ref ' +
"--format=\'%(objectname:short)\' refs/heads/upstream",
cwd=temp_dep_dir,
shell=True
)
commit = byte_str_decode(commit)
commit = commit.strip()
# perform merge-base on most recent default branch commit and pinned mirror commit
ancestor_commit = subprocess.check_output(
'git merge-base {commit} {depUrl}'.format(commit=commit, depUrl=dep[1]),
cwd=temp_dep_dir,
shell=True
)
ancestor_commit = byte_str_decode(ancestor_commit)
ancestor_commit = ancestor_commit.strip()
print('Ancestor commit: ' + ancestor_commit)
return ancestor_commit
except subprocess.CalledProcessError as error:
print(
"Subprocess command '{0}' failed with exit code: {1}.".format(
error.cmd, str(error.returncode)
)
)
if error.output:
print("Subprocess error output: '{0}'".format(error.output))
return {}
def write_sarif(responses, manifest_file):
"""
Creates a full SARIF report based on the OSV API response which
may contain several vulnerabilities
Combines a rule with a result in order to construct the report
"""
data = sarif_log
for response in responses:
for vuln in response['vulns']:
new_rule = create_rule_entry(vuln)
data['runs'][0]['tool']['driver']['rules'].append(new_rule)
data['runs'][0]['results'].append(create_result_entry(vuln))
with open(manifest_file, 'w') as out:
json.dump(data, out)
def create_rule_entry(vuln):
"""
Creates a Sarif rule entry from an OSV finding.
Vuln object follows OSV Schema and is required to have 'id' and 'modified'
"""
rule = sarif_rule()
rule['id'] = vuln['id']
rule['shortDescription']['text'] = vuln['id']
rule['help']['text'] += vuln['id']
return rule
def create_result_entry(vuln):
"""
Creates a Sarif res entry from an OSV entry.
Rule finding linked to the associated rule metadata via ruleId
"""
result = sarif_result()
result['ruleId'] = vuln['id']
return result
def parse_args(args):
args = args[1:]
parser = argparse.ArgumentParser(
description='A script to scan a flattened DEPS file using OSV API.'
)
parser.add_argument(
'--flat-deps',
'-d',
type=str,
help='Input flattened DEPS file.',
default=os.path.join(CHECKOUT_ROOT, 'deps_flatten.txt')
)
parser.add_argument(
'--output',
'-o',
type=str,
help='Output SARIF log of vulnerabilities found in OSV database.',
default=os.path.join(CHECKOUT_ROOT, 'osvReport.sarif')
)
return parser.parse_args(args)
def main(argv):
args = parse_args(argv)
osv_scans = parse_deps_file(args.flat_deps)
write_sarif(osv_scans, args.output)
return 0
if __name__ == '__main__':
sys.exit(main(sys.argv))