-
Notifications
You must be signed in to change notification settings - Fork 54
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Avoid Storing Large Strings in Memory #194
Changes from 45 commits
ff2b110
0a1253a
f653e53
79cbbe7
c6e337c
b823e37
2757328
8caa0a5
a9bab89
f230288
597e248
61a9150
300688d
573c1f7
95cafc2
50a2259
62e1474
9e3bbe4
ef1a463
4f1aa87
972fa25
a4dc4d0
50f127d
1b0394b
a99bf47
2530189
73a2283
23113c7
f9c31f6
b2a159f
27ade6f
d8ffc64
8090664
aff20d0
1efe031
05c02b6
b65ed5f
9bbd08f
9ef506f
f71cee6
25ee568
c3ecaff
bbad767
aba79be
2a170c0
b8bdff0
c6956c2
67953ed
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,86 @@ | ||
package sjsonnet | ||
|
||
import java.io.{BufferedInputStream, File, FileInputStream} | ||
import java.nio.charset.StandardCharsets | ||
import java.nio.file.Files | ||
import java.util.zip.CRC32 | ||
import fastparse.ParserInput | ||
|
||
/** | ||
* A class that encapsulates a resolved import. This is used to cache the result of | ||
* resolving an import. If the import is deemed too large (IE it's a large file), then we will avoid keeping it in | ||
* memory and instead will re-read it from disk. | ||
*/ | ||
class CachedResolvedFile(val resolvedImportPath: OsPath, memoryLimitBytes: Long) extends ResolvedFile { | ||
|
||
private val jFile: File = resolvedImportPath.p.toIO | ||
|
||
assert(jFile.exists(), s"Resolved import path ${resolvedImportPath} does not exist") | ||
// Assert that the file is less than limit | ||
assert(jFile.length() <= memoryLimitBytes, s"Resolved import path ${resolvedImportPath} is too large: ${jFile.length()} bytes > ${memoryLimitBytes} bytes") | ||
|
||
private[this] val resolvedImportContent: StaticResolvedFile = { | ||
if (jFile.length() > 1024 * 1024) { | ||
// If the file is too large, then we will just read it from disk | ||
null | ||
} else { | ||
StaticResolvedFile(readString(jFile)) | ||
} | ||
} | ||
|
||
private[this] def readString(jFile: File): String = { | ||
new String(Files.readAllBytes(jFile.toPath), StandardCharsets.UTF_8); | ||
} | ||
|
||
/** | ||
* A method that will return a reader for the resolved import. If the import is too large, then this will return | ||
* a reader that will read the file from disk. Otherwise, it will return a reader that reads from memory. | ||
*/ | ||
def getParserInput(): ParserInput = { | ||
if (resolvedImportContent == null) { | ||
FileParserInput(jFile) | ||
} else { | ||
resolvedImportContent.getParserInput() | ||
} | ||
} | ||
|
||
override def readString(): String = { | ||
if (resolvedImportContent == null) { | ||
// If the file is too large, then we will just read it from disk | ||
readString(jFile) | ||
} else { | ||
// Otherwise, we will read it from memory | ||
resolvedImportContent.readString() | ||
} | ||
} | ||
|
||
private def crcHashFile(file: File): Long = { | ||
val buffer = new Array[Byte](8192) | ||
val crc = new CRC32() | ||
|
||
val fis = new FileInputStream(file) | ||
val bis = new BufferedInputStream(fis) | ||
|
||
try { | ||
var bytesRead = bis.read(buffer) | ||
while (bytesRead != -1) { | ||
crc.update(buffer, 0, bytesRead) | ||
bytesRead = bis.read(buffer) | ||
} | ||
} finally { | ||
bis.close() | ||
fis.close() | ||
} | ||
|
||
crc.getValue() | ||
} | ||
|
||
override lazy val contentHash: String = { | ||
if (resolvedImportContent == null) { | ||
// If the file is too large, then we will just read it from disk | ||
crcHashFile(jFile).toString | ||
} else { | ||
resolvedImportContent.contentHash | ||
} | ||
} | ||
} |
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -22,8 +22,9 @@ object SjsonnetMain { | |
.find(os.exists) | ||
.flatMap(p => try Some(OsPath(p)) catch{case NonFatal(_) => None}) | ||
|
||
def read(path: Path): Option[String] = | ||
try Some(os.read(path.asInstanceOf[OsPath].p)) catch { case NonFatal(_) => None } | ||
def read(path: Path): Option[ResolvedFile] = { | ||
readPath(path) | ||
} | ||
} | ||
|
||
def main(args: Array[String]): Unit = { | ||
|
@@ -205,8 +206,9 @@ object SjsonnetMain { | |
case Some(i) => new Importer { | ||
def resolve(docBase: Path, importName: String): Option[Path] = | ||
i(docBase, importName).map(OsPath) | ||
def read(path: Path): Option[String] = | ||
try Some(os.read(path.asInstanceOf[OsPath].p)) catch { case NonFatal(_) => None } | ||
def read(path: Path): Option[ResolvedFile] = { | ||
readPath(path) | ||
} | ||
} | ||
case None => resolveImport(config.jpaths.map(os.Path(_, wd)).map(OsPath(_)), allowedInputs) | ||
}, | ||
|
@@ -291,4 +293,18 @@ object SjsonnetMain { | |
|
||
} | ||
} | ||
|
||
/** | ||
* Read a path into a [[ResolvedFile]] if it exists and is a file. A resolved file acts as a layer | ||
* of caching on top of the underlying file system. Small files are read into memory, while large | ||
* files are read from disk. | ||
*/ | ||
private[this] def readPath(path: Path): Option[ResolvedFile] = { | ||
val osPath = path.asInstanceOf[OsPath].p | ||
if (os.exists(osPath) && os.isFile(osPath)) { | ||
Some(new CachedResolvedFile(path.asInstanceOf[OsPath], memoryLimitBytes = 2048L * 1024L * 1024L)) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. What's the significance of this number? Should it just be There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Changed to Int.MaxValue.toLong and added scaladoc:
|
||
} else { | ||
None | ||
} | ||
} | ||
} |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Added these extra imports as they're now required to compile the given target. I also added the gatekeeper as another benchmark case.