Skip to content
This repository was archived by the owner on Oct 25, 2024. It is now read-only.

Feature/support older intel mac book pro with gcc 13 #1085

Draft
wants to merge 16 commits into
base: main
Choose a base branch
from
Draft
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion docker/README.md
Original file line number Diff line number Diff line change
@@ -24,7 +24,7 @@ docker compose build
OR
```bash
docker pull intel/ai-tools:itrex-1.3.0
docker pull intel/ai-tools:itrex-devel-1.3.0
docker pull intel/ai-tools:itrex-1.3.0-devel
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Thanks for the correction. I'm not familiar with docker but it seems that you are right. To be confirmed by @tylertitsworth?

```

## Use Docker Image
Original file line number Diff line number Diff line change
@@ -601,7 +601,7 @@ def forward(self, sequence_output, pooled_output):

class BertPreTrainedModel(nn.Module):
""" An abstract class to handle weights initialization and
a simple interface for dowloading and loading pretrained models.
a simple interface for downloading and loading pretrained models.
"""
def __init__(self, config, *inputs, **kwargs):
super(BertPreTrainedModel, self).__init__()
@@ -663,7 +663,7 @@ def from_pretrained(cls, pretrained_model_name_or_path, state_dict=None, cache_d
. `model.chkpt` a TensorFlow checkpoint
from_tf: should we load the weights from a locally saved TensorFlow checkpoint
cache_dir: an optional path to a folder in which the pre-trained models will be cached.
state_dict: an optional state dictionnary (collections.OrderedDict object) to use instead of Google pre-trained models
state_dict: an optional state dictionary (collections.OrderedDict object) to use instead of Google pre-trained models
*inputs, **kwargs: additional input for the specific Bert class
(ex: num_labels for BertForSequenceClassification)
"""
Original file line number Diff line number Diff line change
@@ -366,7 +366,7 @@ inline float get_mxfp_maxnorm(const JBLAS_DTYPE t, int ebits, int mantissa_bits)
return max_norm;
}

#ifndef _WIN32
#if !defined(_WIN32) && !defined(__APPLE__)
static void request_perm_xtile_data() {
unsigned long bitmask;
long rc;
Original file line number Diff line number Diff line change
@@ -1228,7 +1228,7 @@ class PaddingTransInterleaveCvt : protected xbyak::JitAvx512f {
jmp(ptr[reg_tmp + reg_tmp2 * sizeof(void*)], T_NEAR); // switch(rows-iterrow) ...
align(sizeof(intptr_t));
L(l_tail_tbl);
db(reinterpret_cast<uintptr_t>(nullptr), sizeof(intptr_t)); // case 0 should never occur
db(reinterpret_cast<uint64_t>(nullptr), sizeof(intptr_t)); // case 0 should never occur
for (int i = 1; i < trans_cell; ++i) putL(l_tail_case[i]);

for (int m_tail = 1; m_tail < trans_cell; ++m_tail) { // case (m_tail):
Original file line number Diff line number Diff line change
@@ -65,7 +65,7 @@ def __import_package(self, model_type):
elif model_type == "mistral":
import intel_extension_for_transformers.llm.runtime.graph.mistral_cpp as cpp_model
else:
raise TypeError("Unspported model type {}!".format(model_type))
raise TypeError("Unsupported model type {}!".format(model_type))
self.module = cpp_model

@staticmethod
@@ -134,7 +134,7 @@ def init_from_bin(self, model_type, model_path, **generate_kwargs):
import platform
sys_platform = platform.platform().lower()
if threads is None:
if "windows" in sys_platform:
if "windows" in sys_platform or "macos" in sys_platform:
cpu_count = os.cpu_count()
generate_kwargs["threads"] = int(cpu_count)
else:
@@ -212,7 +212,7 @@ def eos_token_id(self):
if self.model_type == 'qwen':
return self.tokenizer.special_tokens['<|endoftext|>']
return self.tokenizer.eos_token_id

def pad_token_id(self):
if self.tokenizer.pad_token_id == None:
if self.batch_size == 1:
Original file line number Diff line number Diff line change
@@ -31,7 +31,7 @@ else ()
target_link_libraries(ne_layers PUBLIC Threads::Threads jblas::jblas ne_vec)
endif()

if(NOT WIN32)
if(NOT WIN32 AND NOT APPLE)
target_link_libraries(ne_layers PUBLIC rt)
endif()

@@ -54,7 +54,7 @@ function(add_test_target src)
target_link_options(${test_target} PRIVATE -fsanitize=address)
target_include_directories(${test_target} PUBLIC .)
target_link_libraries(${test_target} PUBLIC Threads::Threads jblas::jblas ne_vec)
if(NOT WIN32)
if(NOT WIN32 AND NOT APPLE)
target_link_libraries(${test_target} PUBLIC rt)
endif()
add_test(NAME ${test_target} COMMAND ${test_target})
Original file line number Diff line number Diff line change
@@ -79,8 +79,8 @@ graph LR;

We need to implement corresponding serialization methods from pytorch format, which is mainly divided into the following three steps.

## 1.1. Hyperparamters
The term **"hyperparamters"** describes a value that is used to configure the behavior of a large language model; this is in contrast to the model's parameters, which are the weight that were derived in the training process that was used to create the model. Each model defines its own hyperparameter structure that defines the hyperparameter values accepted by that model. Valid ITREX graph files must list these values in the correct order, and each value must be represented using the correct data type. Although hyperparameters are different across models, some attributes appear in the hyperparameters for most models:
## 1.1. Hyperparameters
The term **"hyperparameters"** describes a value that is used to configure the behavior of a large language model; this is in contrast to the model's parameters, which are the weight that were derived in the training process that was used to create the model. Each model defines its own hyperparameter structure that defines the hyperparameter values accepted by that model. Valid ITREX graph files must list these values in the correct order, and each value must be represented using the correct data type. Although hyperparameters are different across models, some attributes appear in the hyperparameters for most models:
- n_vocab: the size of the model's vocabulary
- n_embd: the size of the model's " embedding layer", which is used during prompt ingestion.
- n_layer: the number of layers in the model; each layer represents a set of weights.
@@ -328,7 +328,7 @@ Most of our model examples only support single prompt processing. You need to ad
+set_target_properties(${TARGET} PROPERTIES POSITION_INDEPENDENT_CODE ON)
+target_link_libraries(${TARGET} PUBLIC ne_layers jblas::jblas)
```
and and new_model to [models_CMakeLists.txt](https://github.com/intel/intel-extension-for-transformers/blob/main/intel_extension_for_transformers/llm/runtime/graph/models/CMakeLists.txt).
and new_model to [models_CMakeLists.txt](https://github.com/intel/intel-extension-for-transformers/blob/main/intel_extension_for_transformers/llm/runtime/graph/models/CMakeLists.txt).
```diff
add_subdirectory(opt)
add_subdirectory(bloom)
Original file line number Diff line number Diff line change
@@ -33,11 +33,11 @@ int32_t get_num_physical_cores() {
#elif defined(__APPLE__) && defined(__MACH__)
int32_t num_physical_cores;
size_t len = sizeof(num_physical_cores);
int result = sysctlbyname("hw.perflevel0.physicalcpu", &num_physical_cores, &len, NULL, 0);
int result = syscall(SYS_sysctlbyname, "hw.perflevel0.physicalcpu", &num_physical_cores, &len, NULL, 0);
if (result == 0) {
return num_physical_cores;
}
result = sysctlbyname("hw.physicalcpu", &num_physical_cores, &len, NULL, 0);
result = syscall(SYS_sysctlbyname, "hw.physicalcpu", &num_physical_cores, &len, NULL, 0);
if (result == 0) {
return num_physical_cores;
}
Original file line number Diff line number Diff line change
@@ -165,7 +165,7 @@ def guessed(model: 'LazyModel') -> 'Params':
n_mult=256,
n_head=n_embd // 128,
n_head_kv=n_embd // 128,
f_norm_eps=1e-5,
rms_norm_eps=1e-5,
n_layer=next(i for i in itertools.count() if f"model.layers.{i}.self_attn.q_proj.weight" not in model),
)

@@ -203,7 +203,7 @@ def loadHFTransformerJson(model: 'LazyModel', config_path: Path) -> 'Params':
)

# LLaMA v2 70B params.json
# {"dim": 8192, "multiple_of": 4096, "ffn_dim_multiplier": 1.3, "n_heads": 64, "n_kv_heads": 8,
# {"dim": 8192, "multiple_of": 4096, "ffn_dim_multiplier": 1.3, "n_heads": 64, "n_kv_heads": 8,
# "n_layers": 80, "norm_eps": 1e-05, "vocab_size": -1}
@staticmethod
def loadOriginalParamsJson(model: 'LazyModel', config_path: Path) -> 'Params':
@@ -230,8 +230,8 @@ def loadOriginalParamsJson(model: 'LazyModel', config_path: Path) -> 'Params':
n_head=n_head,
n_head_kv=n_head_kv,
ffn_hidden_size=ffn_hidden_size,
bos_token_id = bos_token_id,
eos_token_id = eos_token_id,
bos_token_id=bos_token_id,
eos_token_id=eos_token_id,
)

@staticmethod
@@ -278,7 +278,7 @@ def __init__(self, fname_tokenizer: Path, params_vocab_size: int, fname_added_to
def sentencepiece_tokens(self) -> Iterable[Tuple[bytes, float]]:
tokenizer = self.sentencepiece_tokenizer
for i in range(self.params_vocab_size):
text: bytes
text: bytes
if i < tokenizer.vocab_size():
if tokenizer.is_unknown(i):
text = " \u2047 ".encode("utf-8")
@@ -1086,7 +1086,7 @@ def write_file_header(self, params: Params, file_type: NEFileType) -> None:
self.fout.write(struct.pack("f", params.rope_theta))
self.fout.write(struct.pack("f", params.rope_scale))

# TODO, bos_token_id = 0 in https://huggingface.co/decapoda-research/llama-7b-hf/blob/main/config.json
# TODO, bos_token_id = 0 in https://huggingface.co/decapoda-research/llama-7b-hf/blob/main/config.json
# but bos_token_id = 1 in llama.cpp
self.fout.write(struct.pack("i", params.bos_token_id))
self.fout.write(struct.pack("i", params.eos_token_id))
@@ -1108,10 +1108,9 @@ def write_vocab(self, vocab: Vocab) -> None:

@staticmethod
def write_vocab_only(fname_out: Path, vocab: Vocab) -> None:
params = Params(n_vocab=vocab.vocab_size, n_embd=0, n_mult=0, n_head=1, n_layer=0)
of = OutputFile(fname_out)
params = Params(n_vocab=vocab.vocab_size, n_embd=0, n_mult=0, n_head=1, n_layer=0, file_type=NEFileType.AllF32)
of = OutputFile(fname_out)
of.write_file_header(params)
of.write_file_header(params, file_type=NEFileType.AllF32)
of.write_vocab(vocab)
of.fout.close()

Original file line number Diff line number Diff line change
@@ -164,7 +164,7 @@ def guessed(model: 'LazyModel') -> 'Params':
n_mult=256,
n_head=n_embd // 128,
n_head_kv=n_embd // 128,
f_norm_eps=1e-5,
rms_norm_eps=1e-5,
n_layer=next(i for i in itertools.count() if f"model.layers.{i}.self_attn.q_proj.weight" not in model),
)

@@ -192,6 +192,7 @@ def loadHFTransformerJson(model: 'LazyModel', config_path: Path) -> 'Params':
ffn_hidden_size=ffn_hidden_size,
rms_norm_eps=rms_norm_eps,
rope_theta=rope_theta,
rope_scale=rope_scale,
)

# LLaMA v2 70B params.json
@@ -1064,8 +1065,8 @@ def write_file_header(self, params: Params, file_type: NEFileType) -> None:

self.fout.write(
struct.pack("i", 1)
)
# TODO, bos_token_id = 0 in https://huggingface.co/decapoda-research/llama-7b-hf/blob/main/config.json
)
# TODO, bos_token_id = 0 in https://huggingface.co/decapoda-research/llama-7b-hf/blob/main/config.json
# but bos_token_id = 1 in llama.cpp
self.fout.write(struct.pack("i", 2))

@@ -1087,10 +1088,9 @@ def write_vocab(self, vocab: Vocab) -> None:

@staticmethod
def write_vocab_only(fname_out: Path, vocab: Vocab) -> None:
params = Params(n_vocab=vocab.vocab_size, n_embd=0, n_mult=0, n_head=1, n_layer=0)
of = OutputFile(fname_out)
params = Params(n_vocab=vocab.vocab_size, n_embd=0, n_mult=0, n_head=1, n_layer=0, file_type=NEFileType.AllF32)
of = OutputFile(fname_out)
of.write_file_header(params)
of.write_file_header(params, file_type=NEFileType.AllF32)
of.write_vocab(vocab)
of.fout.close()

Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
--extra-index-url https://download.pytorch.org/whl/cpu
torch==2.1.0+cpu
torch==2.1.0+cpu ; sys_platform != 'darwin'
torch==2.1.0 ; sys_platform == 'darwin'
transformers
numpy
sentencepiece
3 changes: 2 additions & 1 deletion requirements.txt
Original file line number Diff line number Diff line change
@@ -4,6 +4,7 @@ py-cpuinfo
setuptools>=65
setuptools_scm[toml]>=6.2
--extra-index-url https://download.pytorch.org/whl/cpu
torch==2.1.0+cpu
torch==2.1.0+cpu ; sys_platform != 'darwin'
torch==2.1.0 ; sys_platform == 'darwin'
accelerate
optimum-intel
21 changes: 12 additions & 9 deletions setup.py
Original file line number Diff line number Diff line change
@@ -71,7 +71,7 @@ class CMakeBuild(build_ext):

@staticmethod
def _is_target_file(file_name: str) -> bool:
if file_name.endswith(".dll") or file_name.endswith(".exe") or file_name.endswith(".pyd"):
if file_name.endswith(".dll") or file_name.endswith(".exe") or file_name.endswith(".pyd") or file_name.endswith(".dylib"):
return True
if file_name.endswith(".so") or ".so." in file_name:
return True
@@ -234,21 +234,24 @@ def check_submodules():
end = time.time()
print(f' --- Submodule initialization took {end - start:.2f} sec')
except Exception:
print(' --- Submodule initalization failed')
print(' --- Submodule initialization failed')
print('Please run:\n\tgit submodule update --init --recursive')
sys.exit(1)


if __name__ == '__main__':
ext_modules = [CMakeExtension(
"intel_extension_for_transformers.qbits", 'intel_extension_for_transformers/llm/operator/csrc', lib_only=True)]
ext_modules = []
if sys.platform != "darwin":
ext_modules.append(CMakeExtension("intel_extension_for_transformers.qbits",
"intel_extension_for_transformers/llm/operator/csrc", lib_only=True))
if not SKIP_RUNTIME:
check_submodules()
ext_modules.extend([
CMakeExtension("intel_extension_for_transformers.neural_engine_py", "intel_extension_for_transformers/llm/runtime/deprecated/"),
CMakeExtension("intel_extension_for_transformers.llm.runtime.graph.mpt_cpp", "intel_extension_for_transformers/llm/runtime/graph/"),
])
cmdclass={'build_ext': CMakeBuild}
ext_modules.append(CMakeExtension("intel_extension_for_transformers.llm.runtime.graph.mpt_cpp",
"intel_extension_for_transformers/llm/runtime/graph/"))
if sys.platform != "darwin":
ext_modules.append(CMakeExtension("intel_extension_for_transformers.neural_engine_py",
"intel_extension_for_transformers/llm/runtime/deprecated/"))
cmdclass = {'build_ext': CMakeBuild}

setup(
name="intel-extension-for-transformers",