Skip to content

Commit

Permalink
Test ContainerIO.calculateSliceOffsetsAndSizes() and fix the slice si…
Browse files Browse the repository at this point in the history
…ze calculation

move ContainerIO.calculateSliceOffsetsAndSizes() to
- Container.populateSlicesAndIndexingParameters()
  • Loading branch information
jmthibault79 committed Mar 19, 2019
1 parent 7925166 commit cc5e617
Show file tree
Hide file tree
Showing 3 changed files with 153 additions and 26 deletions.
50 changes: 49 additions & 1 deletion src/main/java/htsjdk/samtools/cram/structure/Container.java
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@ public class Container {
private final ReferenceContext referenceContext;

// container header as defined in the specs, in addition to sequenceId from ReferenceContext

/**
* Byte size of the content excluding header.
*/
Expand All @@ -44,7 +45,16 @@ public class Container {

public long bases = 0;
public int blockCount = -1;

// Slice byte boundaries within this container, after the header. Equal to Slice.offset.
// e.g. if landmarks[0] = 9000 and landmarks[1] = 109000, we know:
// the container's header size = 9000
// Slice[0].offset = 9000
// Slice[0].size = 109000 - 9000 = 100000
// Slice[1].offset = 109000

public int[] landmarks;

public int checksum = 0;

/**
Expand All @@ -59,7 +69,7 @@ public class Container {

// for indexing:
/**
* Container start in the stream.
* Container start in the stream, in bytes.
*/
public long offset;

Expand Down Expand Up @@ -136,6 +146,44 @@ else if (sliceRefContexts.size() > 1) {
return container;
}

/**
* Assign this Container's slices, and populate those slices'
* indexing parameters from this Container
* @param slicesToPopulate the slices to populate
*/
void populateSlicesAndIndexingParameters(final ArrayList<Slice> slicesToPopulate) {

slices = new Slice[slicesToPopulate.size()];

if (slicesToPopulate.isEmpty()) {
return;
}

final int lastSliceIndex = slicesToPopulate.size() - 1;
for (int i = 0; i < lastSliceIndex; i++) {
final Slice slice = slicesToPopulate.get(i);
slice.containerOffset = offset;
slice.index = i;
slice.offset = landmarks[i];
slice.size = landmarks[i + 1] - slice.offset;
slices[i] = slice;
}

final Slice lastSlice = slicesToPopulate.get(lastSliceIndex);
lastSlice.containerOffset = offset;
lastSlice.index = lastSliceIndex;
lastSlice.offset = landmarks[lastSliceIndex];

// calculate a "final landmark" indicating the byte offset of the end of the container
// equivalent to the container's total byte size

final int containerHeaderSize = landmarks[0];
final int containerTotalByteSize = containerHeaderSize + containerByteSize;
lastSlice.size = containerTotalByteSize - lastSlice.offset;

this.slices[lastSliceIndex] = lastSlice;
}

/**
* Retrieve the list of CRAI Index entries corresponding to this Container.
*
Expand Down
29 changes: 4 additions & 25 deletions src/main/java/htsjdk/samtools/cram/structure/ContainerIO.java
Original file line number Diff line number Diff line change
Expand Up @@ -3,9 +3,9 @@
import htsjdk.samtools.cram.build.CramIO;
import htsjdk.samtools.cram.common.CramVersionPolicies;
import htsjdk.samtools.cram.common.Version;
import htsjdk.samtools.util.Log;
import htsjdk.samtools.cram.structure.block.Block;
import htsjdk.samtools.cram.structure.block.BlockContentType;
import htsjdk.samtools.util.Log;
import htsjdk.samtools.util.RuntimeIOException;

import java.io.*;
Expand Down Expand Up @@ -78,38 +78,18 @@ private static Container readContainer(final int major, final InputStream inputS
throw new RuntimeIOException(e);
}

final List<Slice> slices = new ArrayList<Slice>();
final ArrayList<Slice> slices = new ArrayList<>();
for (int sliceCount = fromSlice; sliceCount < howManySlices - fromSlice; sliceCount++) {
final Slice slice = SliceIO.read(major, inputStream);
slice.index = sliceCount;
slices.add(slice);
slices.add(SliceIO.read(major, inputStream));
}

container.slices = slices.toArray(new Slice[slices.size()]);

calculateSliceOffsetsAndSizes(container);
container.populateSlicesAndIndexingParameters(slices);

log.debug("READ CONTAINER: " + container.toString());

return container;
}

private static void calculateSliceOffsetsAndSizes(final Container container) {
if (container.slices.length == 0) return;
for (int i = 0; i < container.slices.length - 1; i++) {
final Slice slice = container.slices[i];
slice.offset = container.landmarks[i];
slice.size = container.landmarks[i + 1] - slice.offset;
slice.containerOffset = container.offset;
slice.index = i;
}
final Slice lastSlice = container.slices[container.slices.length - 1];
lastSlice.offset = container.landmarks[container.landmarks.length - 1];
lastSlice.size = container.containerByteSize - lastSlice.offset;
lastSlice.containerOffset = container.offset;
lastSlice.index = container.slices.length - 1;
}

/**
* Writes a {@link Container} header information to a {@link OutputStream}.
*
Expand Down Expand Up @@ -173,7 +153,6 @@ public static int writeContainer(final Version version, final Container containe
container.landmarks[i] = landmarks.get(i);

container.containerByteSize = byteArrayOutputStream.size();
calculateSliceOffsetsAndSizes(container);

int length = ContainerHeaderIO.writeContainerHeader(version.major, container, outputStream);
try {
Expand Down
100 changes: 100 additions & 0 deletions src/test/java/htsjdk/samtools/cram/structure/ContainerTest.java
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
import org.testng.annotations.DataProvider;
import org.testng.annotations.Test;

import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;
import java.util.Map;
Expand Down Expand Up @@ -150,4 +151,103 @@ public void getSpansTest(final List<CramCompressionRecord> records,
Assert.assertEquals(spanMap.get(expectedReferenceContext), expectedAlignmentSpan);
}

// show that we can populate all of the slice indexing fields from the
// values in the container's header

// this is part of the deserialization process, and supports index creation

// single slice

@Test
public static void populateSlicesAndIndexingParametersOneSlice() {
// this container starts 100,000 bytes into the CRAM stream
final int containerStreamByteOffset = 100000;

// this Container consists of:
// a header of size 1234 bytes
// a Slice of size 6262 bytes

final int containerHeaderSize = 1234;
final int sliceSize = 6262;

final Container container = createOneSliceContainer(containerStreamByteOffset, containerHeaderSize, sliceSize);

assertSliceIndexingParams(container.slices[0], 0, containerStreamByteOffset, sliceSize, containerHeaderSize);
}

// two slices

@Test
public static void populateSlicesAndIndexingParametersTwoSlices() {
// this container starts 200,000 bytes into the CRAM stream
final int containerStreamByteOffset = 200000;

// this Container consists of:
// a header of size 3234 bytes
// a Slice of size 7890 bytes
// a Slice of size 5555 bytes

final int containerHeaderSize = 3234;
final int slice0size = 7890;
final int slice1size = 5555;

final Container container = createTwoSliceContainer(containerStreamByteOffset, containerHeaderSize, slice0size, slice1size);

assertSliceIndexingParams(container.slices[0], 0, containerStreamByteOffset, slice0size, containerHeaderSize);
assertSliceIndexingParams(container.slices[1], 1, containerStreamByteOffset, slice1size, containerHeaderSize + slice0size);
}

private static Container createOneSliceContainer(final int containerStreamByteOffset,
final int containerHeaderSize,
final int slice0size) {
final ReferenceContext refContext = new ReferenceContext(0);

final Container container = new Container(refContext);
container.offset = containerStreamByteOffset;
container.containerByteSize = slice0size;
container.landmarks = new int[]{
containerHeaderSize, // beginning of slice
};

final ArrayList<Slice> slices = new ArrayList<Slice>() {{
add(new Slice(refContext));
}};
container.populateSlicesAndIndexingParameters(slices);
return container;
}

private static Container createTwoSliceContainer(final int containerStreamByteOffset,
final int containerHeaderSize,
final int slice0size,
final int slice1size) {
final int containerDataSize = slice0size + slice1size;

final ReferenceContext refContext = new ReferenceContext(0);

final Container container = new Container(refContext);
container.offset = containerStreamByteOffset;
container.containerByteSize = containerDataSize;
container.landmarks = new int[]{
containerHeaderSize, // beginning of slice 1
containerHeaderSize + slice0size // beginning of slice 2
};

final ArrayList<Slice> slices = new ArrayList<Slice>() {{
add(new Slice(refContext));
add(new Slice(refContext));
}};
container.populateSlicesAndIndexingParameters(slices);
return container;
}

private static void assertSliceIndexingParams(final Slice slice,
final int expectedIndex,
final int expectedContainerOffset,
final int expectedSize,
final int expectedOffset) {
Assert.assertEquals(slice.index, expectedIndex);
Assert.assertEquals(slice.containerOffset, expectedContainerOffset);
Assert.assertEquals(slice.size, expectedSize);
Assert.assertEquals(slice.offset, expectedOffset);
}
}

0 comments on commit cc5e617

Please sign in to comment.