Skip to content

Commit b5eff78

Browse files
authored
chore: test bf16 fixes in CI (#3491)
1 parent a1f37ad commit b5eff78

File tree

8 files changed

+38
-45
lines changed

8 files changed

+38
-45
lines changed

.github/workflows/build-test-linux.yml

+2
Original file line numberDiff line numberDiff line change
@@ -138,6 +138,7 @@ jobs:
138138
pushd .
139139
cd tests/py
140140
python -m pip install -r requirements.txt
141+
python -m pip install nvidia-modelopt[all] --extra-index-url https://pypi.nvidia.com
141142
cd dynamo
142143
python -m pytest -ra --junitxml=${RUNNER_TEST_RESULTS_DIR}/dynamo_converters_test_results.xml -n 4 conversion/
143144
python -m pytest -ra --junitxml=${RUNNER_TEST_RESULTS_DIR}/dynamo_converters_test_results.xml automatic_plugin/test_automatic_plugin.py
@@ -172,6 +173,7 @@ jobs:
172173
pushd .
173174
cd tests/py
174175
python -m pip install -r requirements.txt
176+
python -m pip install nvidia-modelopt[all] --extra-index-url https://pypi.nvidia.com
175177
cd dynamo
176178
python -m pytest -ra --junitxml=${RUNNER_TEST_RESULTS_DIR}/dyn_models_export.xml --ir dynamo models/
177179
popd

py/torch_tensorrt/dynamo/conversion/_TRTInterpreter.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -893,7 +893,7 @@ def get_attr(self, target: str, args: Any, kwargs: Any) -> np.ndarray:
893893
else:
894894
constant_tensor = frozen_attr
895895

896-
return to_torch(constant_tensor)
896+
return to_torch(constant_tensor)
897897

898898
def call_method(self, target: str, args: Any, kwargs: Any) -> Any:
899899
assert isinstance(target, str)

py/torch_tensorrt/dynamo/conversion/converter_utils.py

+33-30
Original file line numberDiff line numberDiff line change
@@ -590,42 +590,45 @@ def to_numpy(
590590
Returns:
591591
A Numpy array or None, if the input was None.
592592
"""
593-
output = None
593+
with unset_fake_temporarily():
594+
output = None
594595

595-
if value is None or isinstance(value, np.ndarray):
596-
output = value
596+
if value is None or isinstance(value, np.ndarray):
597+
output = value
597598

598-
elif isinstance(value, torch.Tensor):
599-
if value.is_quantized:
600-
value = value.dequantize()
601-
elif value.dtype == torch.bfloat16:
602-
# TODO: Remove when numpy has a BF16 type
603-
_LOGGER.warning(
604-
"Requested a conversion of bfloat16 tensor from torch to numpy which isn't supported. Casting this tensor to FP32 precision currently. Please use to_torch() API for better data representation",
605-
)
606-
value = value.to(torch.float)
607-
608-
output = value.cpu().detach().contiguous().numpy()
599+
elif isinstance(value, torch.Tensor):
600+
if value.is_quantized:
601+
value = value.dequantize()
602+
elif value.dtype == torch.bfloat16:
603+
# TODO: Remove when numpy has a BF16 type
604+
_LOGGER.warning(
605+
"Requested a conversion of bfloat16 tensor from torch to numpy which isn't supported. Casting this tensor to FP32 precision currently. Please use to_torch() API for better data representation",
606+
)
607+
value = value.to(torch.float)
609608

610-
elif isinstance(value, int):
611-
output = np.array([value], dtype=np.int32)
609+
output = value.cpu().detach().contiguous().numpy()
612610

613-
elif isinstance(value, float):
614-
output = np.array([value], dtype=np.float32)
611+
elif isinstance(value, int):
612+
output = np.array([value], dtype=np.int32)
615613

616-
elif isinstance(value, bool):
617-
output = np.array([value], dtype=np.bool_)
614+
elif isinstance(value, float):
615+
output = np.array([value], dtype=np.float32)
618616

619-
if isinstance(output, np.ndarray) or output is None:
620-
return (
621-
output
622-
if (dtype is None or output is None)
623-
else output.astype(_enums.dtype._from(dtype).to(np.dtype, use_default=True))
624-
)
625-
else:
626-
raise AssertionError(
627-
f"to_numpy can only be called on None, bool, int, float, np.ndarray, or torch.Tensor, got: {value}"
628-
)
617+
elif isinstance(value, bool):
618+
output = np.array([value], dtype=np.bool_)
619+
620+
if isinstance(output, np.ndarray) or output is None:
621+
return (
622+
output
623+
if (dtype is None or output is None)
624+
else output.astype(
625+
_enums.dtype._from(dtype).to(np.dtype, use_default=True)
626+
)
627+
)
628+
else:
629+
raise AssertionError(
630+
f"to_numpy can only be called on None, bool, int, float, np.ndarray, or torch.Tensor, got: {value}"
631+
)
629632

630633

631634
def to_torch(

tests/py/dynamo/backend/test_backend_compiler.py

+1-7
Original file line numberDiff line numberDiff line change
@@ -2,11 +2,10 @@
22
from copy import deepcopy
33

44
import torch
5+
import torch_tensorrt
56
from torch.testing._internal.common_utils import TestCase, run_tests
67
from torch_tensorrt.dynamo.partitioning import fast_partition
78

8-
import torch_tensorrt
9-
109
from ..testing_utilities import DECIMALS_OF_AGREEMENT, lower_graph_testing
1110

1211

@@ -51,7 +50,6 @@ def forward(self, x, y):
5150
pass_through_build_failures=True,
5251
torch_executed_ops={"torch.ops.aten.add.Tensor"},
5352
use_python_runtime=False,
54-
debug=True,
5553
)
5654
optimized_model_results = optimized_model(*inputs).detach().cpu()
5755
torch_model_results = fx_graph(*inputs).detach().cpu()
@@ -132,7 +130,6 @@ def forward(self, x, y):
132130
pass_through_build_failures=True,
133131
torch_executed_ops={"torch.ops.aten.add.Tensor"},
134132
use_python_runtime=False,
135-
debug=True,
136133
)
137134
optimized_model_results = optimized_model(*inputs).detach().cpu()
138135
torch_model_results = model(*inputs).detach().cpu()
@@ -177,7 +174,6 @@ def forward(self, x, y):
177174
optimization_level=4,
178175
version_compatible=True,
179176
max_aux_streams=5,
180-
debug=True,
181177
)
182178
optimized_model_results = optimized_model(*inputs).detach().cpu()
183179
torch_model_results = fx_graph(*inputs).detach().cpu()
@@ -225,7 +221,6 @@ def forward(self, x, y):
225221
min_block_size=1,
226222
pass_through_build_failures=True,
227223
truncate_double=True,
228-
debug=True,
229224
)
230225
optimized_model_results = optimized_model(*inputs).detach().cpu()
231226
torch_model_results = fx_graph(*inputs).detach().cpu()
@@ -298,7 +293,6 @@ def forward(self, x, y):
298293
min_block_size=1,
299294
pass_through_build_failures=True,
300295
truncate_double=False,
301-
debug=True,
302296
torch_executed_ops={"torch.ops.aten.add.Tensor"},
303297
)
304298
optimized_model_results = optimized_model(*inputs).detach().cpu()

tests/py/dynamo/conversion/harness.py

-2
Original file line numberDiff line numberDiff line change
@@ -415,7 +415,6 @@ def run_test(
415415
compilation_settings = CompilationSettings(
416416
enabled_precisions={dtype._from(precision)},
417417
truncate_double=True,
418-
debug=True,
419418
immutable_weights=immutable_weights,
420419
)
421420

@@ -507,7 +506,6 @@ def run_test_compare_tensor_attributes_only(
507506
compilation_settings = CompilationSettings(
508507
enabled_precisions={dtype._from(precision)},
509508
truncate_double=True,
510-
debug=True,
511509
immutable_weights=immutable_weights,
512510
)
513511

tests/py/dynamo/models/test_dtype_support.py

-1
Original file line numberDiff line numberDiff line change
@@ -297,7 +297,6 @@ def forward(self, x):
297297
ir="torch_compile",
298298
inputs=inputs,
299299
enabled_precisions={torch.bfloat16},
300-
debug=True,
301300
min_block_size=1,
302301
device=device,
303302
cache_built_engines=False,

tests/py/dynamo/models/test_models_export.py

+1-2
Original file line numberDiff line numberDiff line change
@@ -254,7 +254,6 @@ def calibrate_loop(model):
254254

255255
@unittest.skipIf(
256256
platform.system() != "Linux"
257-
or torch.cuda.get_device_capability() < (8, 9)
258257
or not importlib.util.find_spec("modelopt")
259258
or Version(metadata.version("nvidia-modelopt")) < Version("0.17.0"),
260259
"modelopt 0.17.0 or later is required, Int8 quantization is supported in modelopt since 0.17.0 or later for linux",
@@ -290,7 +289,7 @@ def calibrate_loop(model):
290289

291290
with torch.no_grad():
292291
with export_torch_mode():
293-
exp_program = torch.export.export(model, (input_tensor,))
292+
exp_program = torch.export.export(model, (input_tensor,), strict=False)
294293
trt_model = torchtrt.dynamo.compile(
295294
exp_program,
296295
inputs=[input_tensor],

tests/py/requirements.txt

-2
Original file line numberDiff line numberDiff line change
@@ -10,5 +10,3 @@ pyyaml
1010
timm>=1.0.3
1111
flashinfer-python; python_version < "3.13"
1212
transformers==4.49.0
13-
nvidia-modelopt[deploy,hf,torch]~=0.17.0; python_version < "3.13"
14-
--extra-index-url https://pypi.nvidia.com

0 commit comments

Comments
 (0)