diff --git a/CHANGELOG.md b/CHANGELOG.md index 1b2f89f..4e4df3d 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -10,6 +10,11 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ### Changed - Upgrade to wombat 3.8.11 (#256) +- Backport changes in wabac.js around JS rewriting rules (#259) + +### Fixed + +- JS rewriting abusively rewrite import function (#255) ## [5.1.1] - 2025-02-17 diff --git a/src/zimscraperlib/rewriting/js.py b/src/zimscraperlib/rewriting/js.py index fa1c664..2faf56c 100644 --- a/src/zimscraperlib/rewriting/js.py +++ b/src/zimscraperlib/rewriting/js.py @@ -154,13 +154,15 @@ def create_js_rules() -> list[TransformationRule]: return [ # rewriting `eval(...)` - invocation (re.compile(r"(?:^|\s)\beval\s*\("), replace_prefix_from(eval_str, "eval")), + (re.compile(r"\([\w]+,\s*eval\)\("), m2str(lambda _: f" {eval_str}")), # rewriting `x = eval` - no invocation (re.compile(r"[=]\s*\beval\b(?![(:.$])"), replace("eval", "self.eval")), + (re.compile(r"var\s+self"), replace("var", "let")), # rewriting `.postMessage` -> `__WB_pmw(self).postMessage` (re.compile(r"\.postMessage\b\("), add_prefix(".__WB_pmw(self)")), # rewriting `location = ` to custom expression `(...).href =` assignement ( - re.compile(r"[^$.]?\s?\blocation\b\s*[=]\s*(?![\s\d=])"), + re.compile(r"(?:^|[^$.+*/%^-])\s?\blocation\b\s*[=]\s*(?![\s\d=])"), add_suffix_non_prop(check_loc), ), # rewriting `return this` @@ -186,6 +188,7 @@ def create_js_rules() -> list[TransformationRule]: # As the rule will match first, it will prevent next rule matching `import` to # be apply to `async import`. (re.compile(r"async\s+import\s*\("), m2str(lambda x: x)), + (re.compile(r"[^$.]\bimport\s*\([^)]*\)\s*\{"), m2str(lambda x: x)), # esm dynamic import, if found, mark as module ( re.compile(r"[^$.]\bimport\s*\("), diff --git a/tests/rewriting/test_js_rewriting.py b/tests/rewriting/test_js_rewriting.py index 9b9b926..6ab7177 100644 --- a/tests/rewriting/test_js_rewriting.py +++ b/tests/rewriting/test_js_rewriting.py @@ -101,6 +101,40 @@ def test_js_rewrite_post_message(simple_js_rewriter: JsRewriter): ) +@pytest.mark.parametrize( + "raw_js,expected", + [ + pytest.param("x = eval; x(a);", "x = self.eval; x(a);", id="case1"), + pytest.param( + " eval(a)", + " WB_wombat_runEval2((_______eval_arg, isGlobal) => { var ge = eval; " + "return isGlobal ? ge(_______eval_arg) : eval(_______eval_arg); })" + ".eval(this, (function() { return arguments })(),a)", + id="case2", + ), + pytest.param( + "$eval = eval; $eval(a);", "$eval = self.eval; $eval(a);", id="case3" + ), + pytest.param( + "foo(a, eval(data));", + "foo(a, WB_wombat_runEval2((_______eval_arg, isGlobal) => { var ge = eval; " + "return isGlobal ? ge(_______eval_arg) : eval(_______eval_arg); })" + ".eval(this, (function() { return arguments })(),data));", + id="case4", + ), + pytest.param( + "return(1, eval)(data);", + "return WB_wombat_runEval2((_______eval_arg, isGlobal) => { var ge = eval; " + "return isGlobal ? ge(_______eval_arg) : eval(_______eval_arg); })" + ".eval(this, (function() { return arguments })(),data);", + id="case5", + ), + ], +) +def test_js_rewrite_evals(simple_js_rewriter: JsRewriter, raw_js: str, expected: str): + assert simple_js_rewriter.rewrite(raw_js) == expected + + class WrappedTestContent(ContentForTests): def __init__( @@ -173,11 +207,20 @@ def wrap_script(text: str) -> str: input_="this. location = 'http://example.com/'", expected="this. location = 'http://example.com/'", ), + WrappedTestContent( + input_="abc-location = http://example.com/", + expected="abc-location = http://example.com/", + ), + WrappedTestContent( + input_="func(location = 0)", + expected="func(location = 0)", + ), WrappedTestContent( input_="if (self.foo) { console.log('blah') }", expected="if (self.foo) { console.log('blah') }", ), WrappedTestContent(input_="window.x = 5", expected="window.x = 5"), + WrappedTestContent(input_=" var self ", expected=" let self "), ] ) def rewrite_wrapped_content(request: pytest.FixtureRequest): @@ -271,6 +314,12 @@ def wrap_import(text: str) -> str: import { Z } from "../../../path.js"; B = await import(somefile); + +class X { + import(a, b, c) { + await import (somefile); + } +} """, expected=""" import * from "../../../example.com/file.js" @@ -282,6 +331,12 @@ def wrap_import(text: str) -> str: import { Z } from "../../path.js"; B = await ____wb_rewrite_import__(import.meta.url, somefile); + +class X { + import(a, b, c) { + await ____wb_rewrite_import__ (import.meta.url, somefile); + } +} """, ), ImportTestContent( @@ -341,6 +396,7 @@ def test_import_rewrite(rewrite_import_content: ImportTestContent): "a.window.x = 5", " postMessage({'a': 'b'})", "simport(5);", + "import(e) {", "a.import(5);", "$import(5);", "async import(val) { ... }",