add docstring and fix typo

xinhe3 · xinhe3 · commit f69f38e75341 · 2025-04-25T06:34:41.000+03:00
Signed-off-by: Xin He &lt;xinhe3@habana.ai&gt;
diff --git a/neural_compressor/torch/algorithms/fp8_quant/_core/quantized_func_wrappers/xpu/xpu_quantized_func_wrapper.py b/neural_compressor/torch/algorithms/fp8_quant/_core/quantized_func_wrappers/xpu/xpu_quantized_func_wrapper.py
@@ -37,7 +37,7 @@ def get_default_quantized_func(self):
     def __call__(self, input, other, out=None, out_dtype=torch.bfloat16, scale_input_inv=None, scale_other_inv=None):
         # TODO FSW-11669 modify call arguments once fp8_gemm_v2 is implemented
         # Current ipex ops fp8_gemm API is misaligned to hpu ops API.
-        # below args are according to ipex ops to allow basic unit testing, but won't support intergartion in
+        # below args are according to ipex ops to allow basic unit testing, but won't support integration in
         # INC patched modules.
         return self._quantized_func_(input,
                                      torch.bfloat16,
diff --git a/neural_compressor/torch/algorithms/layer_wise/utils.py b/neural_compressor/torch/algorithms/layer_wise/utils.py
@@ -256,6 +256,12 @@ def load_module(model, module_name, path, device="cpu"):
         set_module_tensor_to_device(model, param_name, device, value)
 
 def load_first_layer_only(user_model, model_name):
+    """load first layer only.
+
+    Args:
+        user_model (torch.nn.Module): input model
+        model_name (str): model name or path
+    """
     for name, m in user_model.named_modules():
         if ('layers' not in name or 'layers.0' in name) and len(name) > 0 and len(list(m.named_children())) == 0:
             load_module(user_model, name, get_path(model_name), device="hpu" if is_hpex_available() else "cpu")
@@ -331,6 +337,7 @@ def hook(module, input, output):
 
 
 def clean_module_weight(module):
+    """Clean module weight."""
     hpu_available = is_hpex_available()
     """Clean module weight."""
     if isinstance(module, QDQLayer):
diff --git a/neural_compressor/torch/algorithms/weight_only/gptq.py b/neural_compressor/torch/algorithms/weight_only/gptq.py
@@ -224,7 +224,8 @@ def __init__(
             use_max_length (bool): set all sequence length to be same length.
             max_seq_length (int): the same length of all sequence length.
             dataloader: an iterable containing calibration datasets, contains (inputs, targets)
-            use_layer_wise (bool): Enables quantize model per layer. Defaults to False.
+            use_layer_wise (bool): Whether to load weights and quantize per layer. Defaults to False.
+            use_block_wise (bool): Whether to load weights and quantize per block. Defaults to False.
             model_path (str): Model path that is used to load state_dict per layer.
             quant_lm_head (bool): Indicates whether quantize the lm_head layer in transformers. Defaults to False.
             device (str): cpu or cuda.
@@ -288,6 +289,7 @@ def prepare_layer_wise(self, model_path, indicated_layers=None, layerwise=True):
                                         If None, all layers will be considered.
                                         Layers not specified in this list will be retained in memory
                                         but will not undergo quantization.
+            layerwise (bool): Whether to apply layer-wise quantization.
         """
         import os
 
diff --git a/neural_compressor/torch/algorithms/weight_only/modules.py b/neural_compressor/torch/algorithms/weight_only/modules.py
@@ -29,8 +29,9 @@
 from .utility import quant_tensor
 
 class Matmul(torch.nn.Module):
-
+    """Basic module for matmul."""
     def __init__(self, ) -> None:
+        """Init the Matmul object."""
         super().__init__()
 
     def forward(self, X, Y):
@@ -108,6 +109,7 @@ def _post_init_for_w4a8(self):
         )
 
     def post_init(self):
+        """Initialization for W4A8 usage"""
         if self.enable_w4a8:
             self._post_init_for_w4a8()