-
Notifications
You must be signed in to change notification settings - Fork 85
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
ndb: add package to read RPM ndb databases
Signed-off-by: Hank Donnay <hdonnay@redhat.com>
- Loading branch information
Showing
10 changed files
with
773 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,4 @@ | ||
// Package ndb provides support for read-only access to an RPM "ndb" database. | ||
// | ||
// The support for ndb's native indexes is probably unneeded, but is implemented for completeness. | ||
package ndb |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,215 @@ | ||
package ndb | ||
|
||
import ( | ||
"bytes" | ||
"errors" | ||
"fmt" | ||
"io" | ||
) | ||
|
||
// Index is an index over an RPM tag. | ||
type Index struct { | ||
// SlotSpace reads the slot section of the Index. | ||
slotSpace *io.SectionReader | ||
// KeySpace reads the key section of the Index. | ||
keySpace *io.SectionReader | ||
// HMask is the mask for hash keys. | ||
hMask uint32 | ||
|
||
indexHeader | ||
} | ||
|
||
// IndexHeader is the header for a tag index. It's meant to be embedded. | ||
type indexHeader struct { | ||
Generation uint32 | ||
NSlots uint32 | ||
UsedSlots uint32 | ||
DummySlots uint32 | ||
XMask uint32 | ||
KeyEnd uint32 | ||
KeyExcess uint32 | ||
} | ||
|
||
// UnmarshalBinary implements encoding.BinaryUnmarshaler for an Index header. | ||
func (i *indexHeader) UnmarshalBinary(b []byte) error { | ||
const ( | ||
magic = ('R' | 'p'<<8 | 'm'<<16 | 'I'<<24) | ||
version = 0 | ||
|
||
offsetMagic = 0 | ||
offsetVersion = 4 | ||
offsetGeneration = 8 | ||
offsetNSlots = 12 | ||
offsetUsedSlots = 16 | ||
offsetDummySlots = 20 | ||
offsetXMask = 24 | ||
offsetKeyEnd = 28 | ||
offsetKeyExcess = 32 | ||
offsetObsolete = 36 | ||
) | ||
if len(b) < 64 { | ||
return io.ErrShortBuffer | ||
} | ||
if le.Uint32(b[offsetMagic:]) != magic { | ||
return errors.New("ndb: index: bad magic") | ||
} | ||
if le.Uint32(b[offsetVersion:]) != version { | ||
return errors.New("ndb: index: bad version") | ||
} | ||
i.Generation = le.Uint32(b[offsetGeneration:]) | ||
i.NSlots = le.Uint32(b[offsetNSlots:]) | ||
i.UsedSlots = le.Uint32(b[offsetUsedSlots:]) | ||
i.DummySlots = le.Uint32(b[offsetDummySlots:]) | ||
i.XMask = le.Uint32(b[offsetXMask:]) | ||
i.KeyEnd = le.Uint32(b[offsetKeyEnd:]) | ||
i.KeyExcess = le.Uint32(b[offsetKeyExcess:]) | ||
// 4 bytes "obsolete" | ||
// 24 bytes padding | ||
return nil | ||
} | ||
|
||
// IndexPair is the package index and data offset. | ||
type IndexPair struct { | ||
Package uint32 | ||
Data uint32 | ||
} | ||
|
||
// Lookup returns the pair (if any) for the provided key. | ||
func (i *Index) Lookup(s string) (pg []IndexPair, err error) { | ||
// NOTE(hank) This is a pretty straight forward port of the C version. | ||
const ( | ||
slotSize = 8 | ||
skip = ^uint32(0) | ||
|
||
offsetKey = 0 | ||
offsetOffset = 4 | ||
) | ||
var keyoff, x uint32 | ||
keyh := murmur(s) | ||
b := make([]byte, slotSize) | ||
Look: | ||
for h, hh := keyh&i.hMask, uint32(7); ; h, hh = (h+hh)&i.hMask, hh+1 { | ||
off := int64(8 * h) | ||
if _, err := i.slotSpace.ReadAt(b, off); err != nil { | ||
return pg, fmt.Errorf("ndb: index: failed to read slot@0x%08x: %w", off, err) | ||
} | ||
x = le.Uint32(b) | ||
switch { | ||
case x == 0: | ||
break Look | ||
case x == skip: | ||
continue | ||
} | ||
if keyoff == 0 { | ||
switch { | ||
case ((x ^ keyh) & i.XMask) != 0: | ||
continue | ||
case !i.equalkey(x & ^i.XMask, s): | ||
continue | ||
} | ||
keyoff = x | ||
} | ||
if keyoff != x { | ||
continue | ||
} | ||
data := le.Uint32(b[offsetOffset:]) | ||
var ovldata uint32 | ||
// If flagged for overflow, read the overflow segment: | ||
if data&0x80000000 != 0 { | ||
off += 4 * int64(h) | ||
if _, err := i.slotSpace.ReadAt(b[:4], off); err != nil { | ||
return pg, fmt.Errorf("ndb: index: failed to read overflow slot@0x%08x: %w", off, err) | ||
} | ||
ovldata = le.Uint32(b) | ||
} | ||
pg = append(pg, i.decodeData(data, ovldata)) | ||
} | ||
return pg, nil | ||
} | ||
|
||
func (i *Index) equalkey(keyoff uint32, s string) bool { | ||
if int64(keyoff)+int64(len(s))+1 > i.keySpace.Size() { | ||
return false | ||
} | ||
l := len(s) | ||
var b []byte | ||
switch { | ||
case l < 255: | ||
b = make([]byte, 1+l) | ||
case l < 65535: | ||
b = make([]byte, 3+l) | ||
default: | ||
b = make([]byte, 7+l) | ||
} | ||
n, _ := i.keySpace.ReadAt(b, int64(keyoff)) | ||
b = b[:n] | ||
switch { | ||
case l < 255: | ||
if b[0] != uint8(l) { | ||
return false | ||
} | ||
b = b[1:] | ||
case l < 65535: | ||
if b[0] != 255 || le.Uint16(b[:1]) != uint16(l) { | ||
return false | ||
} | ||
b = b[3:] | ||
default: | ||
if b[0] != 255 || b[1] != 255 || b[2] != 255 || le.Uint32(b[3:]) != uint32(l) { | ||
return false | ||
} | ||
b = b[7:] | ||
} | ||
return bytes.Equal([]byte(s), b) | ||
} | ||
|
||
func (i *Index) decodeData(data, ovldata uint32) (t IndexPair) { | ||
switch { | ||
case (data & 0x80000000) != 0: | ||
t.Data = data ^ 0x80000000 | ||
t.Package = ovldata | ||
case (data & 0x40000000) != 0: | ||
t.Data = (data ^ 0x40000000) >> 24 | ||
t.Package = data & 0xffffff | ||
default: | ||
t.Data = data >> 20 | ||
t.Package = data & 0xfffff | ||
} | ||
return t | ||
} | ||
|
||
func (i *Index) encodeData(pkgIdx, datIdx uint32) (data, ovldata uint32) { | ||
switch { | ||
case (pkgIdx < 0x100000 && datIdx < 0x400): | ||
ovldata = 0 | ||
data = pkgIdx | datIdx<<20 | ||
case (pkgIdx < 0x1000000 && datIdx < 0x40): | ||
ovldata = 0 | ||
data = pkgIdx | datIdx<<24 | 0x40000000 | ||
default: | ||
ovldata = pkgIdx | ||
data = datIdx | 0x80000000 | ||
} | ||
return data, ovldata | ||
} | ||
|
||
// Parse closes over the provided [io.ReaderAt] and populates the provided Index. | ||
func (i *Index) Parse(r io.ReaderAt) error { | ||
const ( | ||
indexSlotOffset = 64 | ||
indexKeyChunksize = 4096 | ||
) | ||
b := make([]byte, indexSlotOffset) | ||
if _, err := r.ReadAt(b, 0); err != nil { | ||
return fmt.Errorf("ndb: index: unable to read bytes: %w", err) | ||
} | ||
if err := i.indexHeader.UnmarshalBinary(b); err != nil { | ||
return fmt.Errorf("ndb: index: unable to unmarshal header: %w", err) | ||
} | ||
|
||
i.hMask = i.NSlots - 1 | ||
i.slotSpace = io.NewSectionReader(r, indexSlotOffset, int64(i.NSlots)*12) | ||
i.keySpace = io.NewSectionReader(r, indexSlotOffset+(int64(i.NSlots)*12), int64(i.KeyEnd)) | ||
|
||
return nil | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,32 @@ | ||
package ndb | ||
|
||
import ( | ||
"os" | ||
"testing" | ||
|
||
"github.com/quay/claircore/rpm/internal/rpm" | ||
) | ||
|
||
func TestLoadIndex(t *testing.T) { | ||
idxf, err := os.Open("testdata/Index.db") | ||
if err != nil { | ||
t.Fatal(err) | ||
} | ||
defer idxf.Close() | ||
var xdb XDB | ||
if err := xdb.Parse(idxf); err != nil { | ||
t.Fatal(err) | ||
} | ||
idx, err := xdb.Index(rpm.TagName) | ||
if err != nil { | ||
t.Fatal(err) | ||
} | ||
p, err := idx.Lookup("filesystem") | ||
if err != nil { | ||
t.Fatal(err) | ||
} | ||
t.Logf("%+#v", p) | ||
if p[0].Package != 3 { | ||
t.Fail() | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,29 @@ | ||
package ndb | ||
|
||
// This is a port of the rpm murmur hash, which uses a single constant rather than a few of them. | ||
func murmur(s string) (h uint32) { | ||
const m = 0x5bd1e995 | ||
h = uint32(len(s) * m) | ||
for ; len(s) >= 4; s = s[4:] { | ||
h += uint32(s[0]) | uint32(s[1])<<8 | uint32(s[2])<<16 | uint32(s[3])<<24 | ||
h *= m | ||
h ^= h >> 16 | ||
} | ||
switch len(s) { | ||
case 3: | ||
h += uint32(s[2]) << 16 | ||
fallthrough | ||
case 2: | ||
h += uint32(s[1]) << 8 | ||
fallthrough | ||
case 1: | ||
h += uint32(s[0]) | ||
h *= m | ||
h ^= h >> 16 | ||
} | ||
h *= m | ||
h ^= h >> 10 | ||
h *= m | ||
h ^= h >> 17 | ||
return h | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,8 @@ | ||
package ndb | ||
|
||
import "testing" | ||
|
||
func TestMurmur(t *testing.T) { | ||
x := "file-magic" | ||
t.Logf("%s\t%08x", x, murmur(x)) | ||
} |
Oops, something went wrong.