Skip to content

Commit

Permalink
feat(transform): add support for passing RegExp literals through cons…
Browse files Browse the repository at this point in the history
…tructor
  • Loading branch information
michaelfig committed Feb 11, 2020
1 parent d74ec62 commit 5c9e1e7
Show file tree
Hide file tree
Showing 17 changed files with 348 additions and 113 deletions.
19 changes: 18 additions & 1 deletion packages/transform-metering/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -48,14 +48,31 @@ Every loop body (including single-statement bodies), and `catch` and `finally` b
}
```

### RegExp literals

All regular expression literals (such as `/some-regexp/g`) are rewritten as follows:

```js
$h_re_1 = RegExp('some-regexp', 'g'); // endowment passed to evaluate
... // existing use of /some-regexp/g replaced by $h_re_1
```

The `$h_re_1` identifier is also blacklisted for evaluated code.

This makes it possible for a wrapped `RegExp` constructor (see [Host endowments](#Host-endowments)) to prevent ["catastrophic backtracking"](https://www.regular-expressions.info/catastrophic.html).

## Host endowments

Without precisely instrumenting the host platform code, this package provides an option to wrap a global object with code that does a rough instrumentation of function calls. This is not a Proxy/Membrane, rather a complete reconstruction of the globals and endowments as an object that can be supplied to the `endowments` parameter of the three-argument evaluator.

The reason for this wrapping is to provide some basic accounting for the resources consumed by the host platform methods. The wrapping makes some assumptions about the host, such as:

1. only flat objects are allocated by builtins, and they are returned
1. only flat objects are allocated by builtins, and they are returned by the builtin
2. builtins that are costly in time- or space-complexity have large return values, or many calls to a supplied function argument
3. builtins have been "tamed" by the SES platform to prevent nondeterminism and pathological behaviour

This at least prevents user code from running after a builtin has exceeded a meter.

### Special treatment

The global object wrapper specially implements `RegExp` using [RE2](https://github.com/google/re2/#readme), which entirely prevents ["catastrophic backtracking"](https://www.regular-expressions.info/catastrophic.html). However, in order to do this, the wrapped object supports neither lookahead nor backreferences.
3 changes: 2 additions & 1 deletion packages/transform-metering/package.json
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,8 @@
},
"dependencies": {
"@agoric/harden": "^0.0.4",
"@agoric/nat": "^2.0.1"
"@agoric/nat": "^2.0.1",
"re2": "^1.10.5"
},
"keywords": [],
"files": [
Expand Down
1 change: 1 addition & 0 deletions packages/transform-metering/src/constants.js
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ export const METER_LEAVE = 'l';
export const METER_COMBINED = '*';

export const DEFAULT_METER_ID = '$h\u200d_meter';
export const DEFAULT_REGEXP_ID_PREFIX = '$h\u200d_re_';

// Default metering values. These can easily be overridden in meter.js.
// true means to use the combined meter.
Expand Down
6 changes: 6 additions & 0 deletions packages/transform-metering/src/endow.js
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
import harden from '@agoric/harden';
import RE2 from 're2';

import * as c from './constants';

Expand All @@ -20,6 +21,11 @@ export function makeMeteringEndowments(
));

function wrap(target) {
if (target === globalsToShadow.RegExp) {
// Replace the RegExp object with RE2.
target = RE2;
}

if (Object(target) !== target) {
return target;
}
Expand Down
33 changes: 25 additions & 8 deletions packages/transform-metering/src/transform.js
Original file line number Diff line number Diff line change
Expand Up @@ -6,13 +6,16 @@ export function makeMeteringTransformer(
babelCore,
overrideParser = undefined,
overrideMeterId = c.DEFAULT_METER_ID,
overrideRegexpIdPrefix = c.DEFAULT_REGEXP_ID_PREFIX,
) {
const parser = overrideParser
? overrideParser.parse || overrideParser
: babelCore.parseSync;
const meterId = overrideMeterId;
const regexpIdPrefix = overrideRegexpIdPrefix;
let regexpNumber = 0;

const meteringPlugin = ({ types: t }) => {
const meteringPlugin = regexpList => ({ types: t }) => {
// Call [[meterId]][idString](...args)
const meterCall = (idString, args = []) => {
const mid = t.Identifier(meterId);
Expand Down Expand Up @@ -67,12 +70,23 @@ export function makeMeteringTransformer(
const visitor = {
// Ensure meter identifiers are generated by us, or abort.
Identifier(path) {
if (path.node.name === meterId && !path.node[METER_GENERATED]) {
if ((path.node.name === meterId || path.node.name.startsWith(regexpIdPrefix))
&& !path.node[METER_GENERATED]) {
throw path.buildCodeFrameError(
`Identifier ${path.node.name} is reserved for metering code`,
);
}
},
RegExpLiteral(path) {
const { pattern, flags } = path.node;
const reid = `${regexpIdPrefix}${regexpNumber}`;
regexpNumber += 1;
regexpList.push(`\
const ${reid}=RegExp(${JSON.stringify(pattern)},${JSON.stringify(flags)});`);
const reNode = t.identifier(reid);
reNode[METER_GENERATED] = true;
path.replaceWith(reNode);
},
// Loop constructs need only a compute meter.
DoWhileStatement(path) {
path.node.body = wrapWithComputeMeter(blockify(path.node.body));
Expand Down Expand Up @@ -124,25 +138,28 @@ export function makeMeteringTransformer(

// Do the actual transform.
const ast = parser(source);
const regexpList = [];
const output = babelCore.transformFromAstSync(ast, source, {
generatorOpts: {
retainLines: true,
},
plugins: [meteringPlugin],
plugins: [meteringPlugin(regexpList)],
ast: true,
code: true,
});

// Meter by the source length.
const meterSource = `\
${meterId}.${c.METER_COMPUTE}(${output.code.length});`;
// Meter by the regular expressions in use.
const reSource = regexpList.join('');

// Force into an IIFE, if necessary.
const maybeSource = output.code;
const actualSource =
ss.sourceType === 'expression'
? `(function(){${meterSource}return ${maybeSource}})()`
: `${meterSource}${maybeSource}`;
? `(function(){${reSource}return ${maybeSource}})()`
: `${reSource}${maybeSource}`;

// Meter how much source code they created.
endowments[meterId][c.METER_COMPUTE](actualSource.length);

return {
...ss,
Expand Down
9 changes: 9 additions & 0 deletions packages/transform-metering/test/test-endow.js
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,15 @@ test('meter running', async t => {
RangeError,
'bound function exhausted',
);

resetters.combined(50);
const re = e.RegExp('^ab*c');
t.equals(re.test('abbbc'), true, 'regexp test works');
t.equals(re.test('aac'), false, 'regexp test fails');
t.equals(!'aac'.match(re), true, 'string match works');
t.equals(!!'abbbc'.match(re), true, 'string match fails');
t.throws(() => e.RegExp('(foo)\\1'), SyntaxError, 'backreferences throw');
t.throws(() => e.RegExp('abc(?=def)'), SyntaxError, 'lookahead throws');
} catch (e) {
t.isNot(e, e, 'unexpected exception');
} finally {
Expand Down
23 changes: 17 additions & 6 deletions packages/transform-metering/test/test-transform.js
Original file line number Diff line number Diff line change
Expand Up @@ -4,22 +4,33 @@ import * as babelCore from '@babel/core';
import fs from 'fs';

import { makeMeteringTransformer } from '../src/index';
import * as c from '../src/constants';

test('meter transform', async t => {
try {
const { meterId, meteringTransform } = makeMeteringTransformer(
babelCore,
undefined,
'$m',
'$re_',
);
const rewrite = source =>
meteringTransform.rewrite({
const rewrite = (source, testName) => {
let cMeter;
const ss = meteringTransform.rewrite({
source,
endowments: {},
endowments: {
[meterId]: {
[c.METER_COMPUTE]: units => cMeter = units,
},
},
sourceType: 'script',
}).source;
});
t.equals(cMeter, ss.source.length, `compute meter updated ${testName}`);
return ss.source;
};

t.throws(
() => rewrite(`${meterId}.l()`),
() => rewrite(`${meterId}.l()`, 'blacklisted meterId'),
SyntaxError,
'meterId cannot appear in source',
);
Expand All @@ -36,7 +47,7 @@ test('meter transform', async t => {
'utf8',
);
t.equals(
rewrite(src.trimRight()),
rewrite(src.trimRight(), testDir),
rewritten.trimRight(),
`rewrite ${testDir}`,
);
Expand Down
Original file line number Diff line number Diff line change
@@ -1,2 +1,2 @@
$m.c(49);() => {try {$m.e();
() => {try {$m.e();
f();} finally {$m.l();}};
Original file line number Diff line number Diff line change
@@ -1,2 +1,2 @@
$m.c(60);() => {try {$m.e();return (
() => {try {$m.e();return (
f());} finally {$m.l();}};
2 changes: 1 addition & 1 deletion packages/transform-metering/testdata/classes/rewrite.js
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
$m.c(78);class Abc {
class Abc {
f() {try {$m.e();
return doit();
} finally {$m.l();}}}
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
$m.c(67);a = {
a = {
f() {try {$m.e();
doit();
} finally {$m.l();}} };
2 changes: 1 addition & 1 deletion packages/transform-metering/testdata/for-loops/rewrite.js
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
$m.c(132);for (const f of b) {$m.c();
for (const f of b) {$m.c();
doit(f);}

for (const p in bar) {$m.c();
Expand Down
Original file line number Diff line number Diff line change
@@ -1,3 +1,3 @@
$m.c(60);(function () {try {$m.e();
(function () {try {$m.e();
f();
} finally {$m.l();}});
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
const $re_0=RegExp("^my-favourite-regexp","");if ($re_0.test('myf')) {
doit();
}
3 changes: 3 additions & 0 deletions packages/transform-metering/testdata/regexp-literal/source.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
if (/^my-favourite-regexp/.test('myf')) {
doit();
}
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
$m.c(115);while (a) {$m.c();}
while (a) {$m.c();}

while (a) {$m.c();doit();}

Expand Down
Loading

0 comments on commit 5c9e1e7

Please sign in to comment.