syft/syft/pkg/cataloger/r/parse_description.go
Alex Goodman 1aaa644007
Remove MetadataType from core package object and normalize JSON metadataType values (#1983)
* [wip]

Signed-off-by: Alex Goodman <wagoodman@users.noreply.github.com>

* distinct the package metadata functions

Signed-off-by: Alex Goodman <wagoodman@users.noreply.github.com>

* remove metadata type from package core model

Signed-off-by: Alex Goodman <wagoodman@users.noreply.github.com>

* incorporate review feedback for names

Signed-off-by: Alex Goodman <wagoodman@users.noreply.github.com>

* add RPM archive metadata and split parser helpers

Signed-off-by: Alex Goodman <wagoodman@users.noreply.github.com>

* clarify the python package metadata type

Signed-off-by: Alex Goodman <wagoodman@users.noreply.github.com>

* rename the KB metadata type

Signed-off-by: Alex Goodman <wagoodman@users.noreply.github.com>

* break hackage and composer types by use case

Signed-off-by: Alex Goodman <wagoodman@users.noreply.github.com>

* linting fix

Signed-off-by: Alex Goodman <wagoodman@users.noreply.github.com>

* fix encoding and decoding for syft-json and cyclonedx

Signed-off-by: Alex Goodman <wagoodman@users.noreply.github.com>

* bump json schema to 11

Signed-off-by: Alex Goodman <wagoodman@users.noreply.github.com>

* update cyclonedx-json snapshots

Signed-off-by: Alex Goodman <wagoodman@users.noreply.github.com>

* update cyclonedx-xml snapshots

Signed-off-by: Alex Goodman <wagoodman@users.noreply.github.com>

* update spdx-json snapshots

Signed-off-by: Alex Goodman <wagoodman@users.noreply.github.com>

* update spdx-tv snapshots

Signed-off-by: Alex Goodman <wagoodman@users.noreply.github.com>

* update syft-json snapshots

Signed-off-by: Alex Goodman <wagoodman@users.noreply.github.com>

* correct metadata type in stack yaml parser test

Signed-off-by: Alex Goodman <wagoodman@users.noreply.github.com>

* fix bom-ref redactor for cyclonedx-xml

Signed-off-by: Alex Goodman <wagoodman@users.noreply.github.com>

* add tests for legacy package metadata names

Signed-off-by: Alex Goodman <wagoodman@users.noreply.github.com>

* regenerate json schema v11

Signed-off-by: Alex Goodman <wagoodman@users.noreply.github.com>

* fix legacy HackageMetadataType reflect type value check

Signed-off-by: Alex Goodman <wagoodman@users.noreply.github.com>

* fix linting

Signed-off-by: Alex Goodman <wagoodman@users.noreply.github.com>

* packagemetadata discovery should account for type shadowing

Signed-off-by: Alex Goodman <wagoodman@users.noreply.github.com>

* fix linting

Signed-off-by: Alex Goodman <wagoodman@users.noreply.github.com>

* fix cli tests

Signed-off-by: Alex Goodman <wagoodman@users.noreply.github.com>

* bump json schema version to v12

Signed-off-by: Alex Goodman <wagoodman@users.noreply.github.com>

* update json schema to incorporate changes from main

Signed-off-by: Alex Goodman <wagoodman@users.noreply.github.com>

* add syft-json legacy config option

Signed-off-by: Alex Goodman <wagoodman@users.noreply.github.com>

* add tests around v11-v12 json decoding

Signed-off-by: Alex Goodman <wagoodman@users.noreply.github.com>

* add docs for SYFT_JSON_LEGACY

Signed-off-by: Alex Goodman <wagoodman@users.noreply.github.com>

* rename structs to be compliant with new naming scheme

Signed-off-by: Alex Goodman <wagoodman@users.noreply.github.com>

---------

Signed-off-by: Alex Goodman <wagoodman@users.noreply.github.com>
2023-10-30 12:12:04 -04:00

148 lines
3.8 KiB
Go

package r
import (
"bufio"
"io"
"regexp"
"strings"
"github.com/anchore/syft/syft/artifact"
"github.com/anchore/syft/syft/file"
"github.com/anchore/syft/syft/pkg"
"github.com/anchore/syft/syft/pkg/cataloger/generic"
)
/* some examples of license strings found in DESCRIPTION files:
find /usr/local/lib/R -name DESCRIPTION | xargs cat | grep 'License:' | sort | uniq
License: GPL
License: GPL (>= 2)
License: GPL (>=2)
License: GPL(>=2)
License: GPL (>= 2) | file LICENCE
License: GPL-2 | GPL-3
License: GPL-3
License: LGPL (>= 2)
License: LGPL (>= 2.1)
License: MIT + file LICENSE
License: Part of R 4.3.0
License: Unlimited
*/
func parseDescriptionFile(_ file.Resolver, _ *generic.Environment, reader file.LocationReadCloser) ([]pkg.Package, []artifact.Relationship, error) {
values := extractFieldsFromDescriptionFile(reader)
m := parseDataFromDescriptionMap(values)
p := newPackage(m, []file.Location{reader.Location}...)
if p.Name == "" || p.Version == "" {
return nil, nil, nil
}
return []pkg.Package{p}, nil, nil
}
type parseData struct {
Package string
Version string
License string
pkg.RDescription
}
func parseDataFromDescriptionMap(values map[string]string) parseData {
return parseData{
License: values["License"],
Package: values["Package"],
Version: values["Version"],
RDescription: pkg.RDescription{
Title: values["Title"],
Description: cleanMultiLineValue(values["Description"]),
Maintainer: values["Maintainer"],
URL: commaSeparatedList(values["URL"]),
Depends: commaSeparatedList(values["Depends"]),
Imports: commaSeparatedList(values["Imports"]),
Suggests: commaSeparatedList(values["Suggests"]),
NeedsCompilation: yesNoToBool(values["NeedsCompilation"]),
Author: values["Author"],
Repository: values["Repository"],
Built: values["Built"],
},
}
}
func yesNoToBool(s string) bool {
/*
$ docker run --rm -it rocker/r-ver bash
$ install2.r ggplot2 dplyr mlr3 caret # just some packages for a larger sample
$ find /usr/local/lib/R -name DESCRIPTION | xargs cat | grep 'NeedsCompilation:' | sort | uniq
NeedsCompilation: no
NeedsCompilation: yes
$ find /usr/local/lib/R -name DESCRIPTION | xargs cat | grep 'NeedsCompilation:' | wc -l
105
*/
return strings.EqualFold(s, "yes")
}
func commaSeparatedList(s string) []string {
var result []string
split := strings.Split(s, ",")
for _, piece := range split {
value := strings.TrimSpace(piece)
if value == "" {
continue
}
result = append(result, value)
}
return result
}
var space = regexp.MustCompile(`\s+`)
func cleanMultiLineValue(s string) string {
return space.ReplaceAllString(s, " ")
}
func extractFieldsFromDescriptionFile(reader io.Reader) map[string]string {
result := make(map[string]string)
key := ""
var valueFragment strings.Builder
scanner := bufio.NewScanner(reader)
for scanner.Scan() {
line := scanner.Text()
// line is like Key: Value -> start capturing value; close out previous value
// line is like \t\t continued value -> append to existing value
if len(line) == 0 {
continue
}
if startsWithWhitespace(line) {
// we're continuing a value
if key == "" {
continue
}
valueFragment.WriteByte('\n')
valueFragment.WriteString(strings.TrimSpace(line))
} else {
if key != "" {
// capture previous value
result[key] = valueFragment.String()
key = ""
valueFragment = strings.Builder{}
}
parts := strings.SplitN(line, ":", 2)
if len(parts) != 2 {
continue
}
key = parts[0]
valueFragment.WriteString(strings.TrimSpace(parts[1]))
}
}
if key != "" {
result[key] = valueFragment.String()
}
return result
}
func startsWithWhitespace(s string) bool {
if s == "" {
return false
}
return s[0] == ' ' || s[0] == '\t'
}