Skip to content

Commit f69f38e

Browse files
committed
add docstring and fix typo
Signed-off-by: Xin He <xinhe3@habana.ai>
1 parent d198b96 commit f69f38e

File tree

4 files changed

+14
-3
lines changed

4 files changed

+14
-3
lines changed

neural_compressor/torch/algorithms/fp8_quant/_core/quantized_func_wrappers/xpu/xpu_quantized_func_wrapper.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -37,7 +37,7 @@ def get_default_quantized_func(self):
3737
def __call__(self, input, other, out=None, out_dtype=torch.bfloat16, scale_input_inv=None, scale_other_inv=None):
3838
# TODO FSW-11669 modify call arguments once fp8_gemm_v2 is implemented
3939
# Current ipex ops fp8_gemm API is misaligned to hpu ops API.
40-
# below args are according to ipex ops to allow basic unit testing, but won't support intergartion in
40+
# below args are according to ipex ops to allow basic unit testing, but won't support integration in
4141
# INC patched modules.
4242
return self._quantized_func_(input,
4343
torch.bfloat16,

neural_compressor/torch/algorithms/layer_wise/utils.py

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -256,6 +256,12 @@ def load_module(model, module_name, path, device="cpu"):
256256
set_module_tensor_to_device(model, param_name, device, value)
257257

258258
def load_first_layer_only(user_model, model_name):
259+
"""load first layer only.
260+
261+
Args:
262+
user_model (torch.nn.Module): input model
263+
model_name (str): model name or path
264+
"""
259265
for name, m in user_model.named_modules():
260266
if ('layers' not in name or 'layers.0' in name) and len(name) > 0 and len(list(m.named_children())) == 0:
261267
load_module(user_model, name, get_path(model_name), device="hpu" if is_hpex_available() else "cpu")
@@ -331,6 +337,7 @@ def hook(module, input, output):
331337

332338

333339
def clean_module_weight(module):
340+
"""Clean module weight."""
334341
hpu_available = is_hpex_available()
335342
"""Clean module weight."""
336343
if isinstance(module, QDQLayer):

neural_compressor/torch/algorithms/weight_only/gptq.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -224,7 +224,8 @@ def __init__(
224224
use_max_length (bool): set all sequence length to be same length.
225225
max_seq_length (int): the same length of all sequence length.
226226
dataloader: an iterable containing calibration datasets, contains (inputs, targets)
227-
use_layer_wise (bool): Enables quantize model per layer. Defaults to False.
227+
use_layer_wise (bool): Whether to load weights and quantize per layer. Defaults to False.
228+
use_block_wise (bool): Whether to load weights and quantize per block. Defaults to False.
228229
model_path (str): Model path that is used to load state_dict per layer.
229230
quant_lm_head (bool): Indicates whether quantize the lm_head layer in transformers. Defaults to False.
230231
device (str): cpu or cuda.
@@ -288,6 +289,7 @@ def prepare_layer_wise(self, model_path, indicated_layers=None, layerwise=True):
288289
If None, all layers will be considered.
289290
Layers not specified in this list will be retained in memory
290291
but will not undergo quantization.
292+
layerwise (bool): Whether to apply layer-wise quantization.
291293
"""
292294
import os
293295

neural_compressor/torch/algorithms/weight_only/modules.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -29,8 +29,9 @@
2929
from .utility import quant_tensor
3030

3131
class Matmul(torch.nn.Module):
32-
32+
"""Basic module for matmul."""
3333
def __init__(self, ) -> None:
34+
"""Init the Matmul object."""
3435
super().__init__()
3536

3637
def forward(self, X, Y):
@@ -108,6 +109,7 @@ def _post_init_for_w4a8(self):
108109
)
109110

110111
def post_init(self):
112+
"""Initialization for W4A8 usage"""
111113
if self.enable_w4a8:
112114
self._post_init_for_w4a8()
113115

0 commit comments

Comments
 (0)