Skip to content

Commit

Permalink
Sort matrix indices if needed before passing to SparseMatrixCSC const…
Browse files Browse the repository at this point in the history
…ructor
  • Loading branch information
rasmushenningsson committed Aug 27, 2024
1 parent 109692a commit 3ca592f
Show file tree
Hide file tree
Showing 2 changed files with 28 additions and 0 deletions.
4 changes: 4 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,10 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0

## [Unreleased]

### Fixed

* Fix bug when loading strange .h5 and .mtx files that are not sorted.

## [0.2.1] - 2023-09-13

### Fixed
Expand Down
24 changes: 24 additions & 0 deletions src/fileio.jl
Original file line number Diff line number Diff line change
Expand Up @@ -72,13 +72,35 @@ end
_matrix(::Type{T}, X, transpose::Bool) where T = convert(T, transpose ? X' : X)


function _fix_sparse_buffers!(P,N,indptr,rowval,nzval)
@assert indptr[1] == 1
@assert indptr[end] == length(rowval)+1

for j in 1:N
rng = indptr[j]:indptr[j+1]-1
isempty(rng) && continue

rowval_j = @view rowval[rng]
if !issorted(rowval_j) # These are normally sorted - but I've found cellranger(?) .h5 files in the wild where they are not. So better check or the data will be corrupt.
nzval_j = @view nzval[rng]

perm = sortperm(rowval_j)
rowval_j .= rowval_j[perm]
nzval_j .= nzval_j[perm]
end
@assert rowval_j[1] >= 1
@assert rowval_j[end] <= P
end
end


function _read10x_matrix(io::HDF5.File, ::Type{Ti}, ::Type{Tv}) where {Ti,Tv}
P,N,_ = _read10x_matrix_metadata(io)
matrixGroup = HDF5.root(io)["matrix"]

indptr = read(matrixGroup["indptr"])
@assert length(indptr)==N+1
@assert issorted(indptr)
rowval = read(matrixGroup["indices"])
nzval = read(matrixGroup["data"])
@assert length(rowval)==length(nzval)
Expand All @@ -95,6 +117,8 @@ function _read10x_matrix(io::HDF5.File, ::Type{Ti}, ::Type{Tv}) where {Ti,Tv}
rowval .+= 1 # 0-based to 1-based
indptr .+= 1 # 0-based to 1-based

_fix_sparse_buffers!(P, N, indptr, rowval, nzval)

SparseMatrixCSC(P, N, indptr, rowval, nzval)
end

Expand Down

0 comments on commit 3ca592f

Please sign in to comment.