Mohamed Mekkouri commited on
Commit
350f7e0
·
1 Parent(s): 95d28ad

Add Builds

Browse files
CMakeLists.txt CHANGED
@@ -1,5 +1,5 @@
1
  cmake_minimum_required(VERSION 3.26)
2
- project(gptoss_kernels LANGUAGES CXX)
3
 
4
  set(CMAKE_OSX_DEPLOYMENT_TARGET "15.0" CACHE STRING "Minimum macOS deployment version")
5
 
@@ -89,8 +89,8 @@ endif()
89
  include(${CMAKE_CURRENT_LIST_DIR}/cmake/compile-metal.cmake)
90
 
91
  define_gpu_extension_target(
92
- _gptoss_kernels_931bc1b_dirty
93
- DESTINATION _gptoss_kernels_931bc1b_dirty
94
  LANGUAGE ${GPU_LANG}
95
  SOURCES ${SRC}
96
  COMPILE_FLAGS ${GPU_FLAGS}
@@ -100,5 +100,5 @@ define_gpu_extension_target(
100
 
101
  # Compile Metal shaders if any were found
102
  if(ALL_METAL_SOURCES)
103
- compile_metal_shaders(_gptoss_kernels_931bc1b_dirty "${ALL_METAL_SOURCES}")
104
  endif()
 
1
  cmake_minimum_required(VERSION 3.26)
2
+ project(gptoss_kernels LANGUAGES CXX C OBJC OBJCXX)
3
 
4
  set(CMAKE_OSX_DEPLOYMENT_TARGET "15.0" CACHE STRING "Minimum macOS deployment version")
5
 
 
89
  include(${CMAKE_CURRENT_LIST_DIR}/cmake/compile-metal.cmake)
90
 
91
  define_gpu_extension_target(
92
+ _gptoss_kernels_3f5b0c9
93
+ DESTINATION _gptoss_kernels_3f5b0c9
94
  LANGUAGE ${GPU_LANG}
95
  SOURCES ${SRC}
96
  COMPILE_FLAGS ${GPU_FLAGS}
 
100
 
101
  # Compile Metal shaders if any were found
102
  if(ALL_METAL_SOURCES)
103
+ compile_metal_shaders(_gptoss_kernels_3f5b0c9 "${ALL_METAL_SOURCES}")
104
  endif()
build.toml CHANGED
@@ -14,24 +14,42 @@ depends = ["torch"]
14
  backend = "metal"
15
 
16
  src = [
17
- "gptoss_kernels/source/accumulate.metal",
18
- "gptoss_kernels/source/expert_routing_metadata.metal",
19
- "gptoss_kernels/source/metal.m",
20
- "gptoss_kernels/source/scatter.metal",
21
- "gptoss_kernels/source/topk.metal",
22
- "gptoss_kernels/source/embeddings.metal",
23
- "gptoss_kernels/source/metal-kernels.c",
24
- "gptoss_kernels/source/random.metal",
25
- "gptoss_kernels/source/sdpa.metal",
26
- "gptoss_kernels/source/matmul.metal",
27
- "gptoss_kernels/source/rmsnorm.metal",
28
- "gptoss_kernels/source/sample.metal",
29
- "gptoss_kernels/source/moematmul.metal",
30
- "gptoss_kernels/source/convert.metal",
31
- "gptoss_kernels/source/rope.metal",
32
- "gptoss_kernels/source/gather_and_accumulate.metal",
33
- "gptoss_kernels/source/tensor_wrappers.cpp",
34
- "gptoss_kernels/source/log.c",
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
35
  ]
36
 
37
  include = ["gptoss_kernels/source/include", "gptoss_kernels/include", "."]
 
14
  backend = "metal"
15
 
16
  src = [
17
+ "gptoss_kernels/include/gpt-oss.h",
18
+ "gptoss_kernels/include/gpt-oss/types.h",
19
+ "gptoss_kernels/include/gpt-oss/macros.h",
20
+ "gptoss_kernels/include/gpt-oss/functions.h",
21
+ "gptoss_kernels/source/accumulate.metal",
22
+ "gptoss_kernels/source/log.c",
23
+ "gptoss_kernels/source/expert_routing_metadata.metal",
24
+ "gptoss_kernels/source/metal.m",
25
+ "gptoss_kernels/source/scatter.metal",
26
+ "gptoss_kernels/source/topk.metal",
27
+ "gptoss_kernels/source/embeddings.metal",
28
+ "gptoss_kernels/source/metal-kernels.c",
29
+ "gptoss_kernels/source/tensor_wrappers.cpp",
30
+ "gptoss_kernels/source/random.metal",
31
+ "gptoss_kernels/source/sdpa.metal",
32
+ "gptoss_kernels/source/matmul.metal",
33
+ "gptoss_kernels/source/rmsnorm.metal",
34
+ "gptoss_kernels/source/sample.metal",
35
+ "gptoss_kernels/source/moematmul.metal",
36
+ "gptoss_kernels/source/convert.metal",
37
+ "gptoss_kernels/source/rope.metal",
38
+ "gptoss_kernels/source/gather_and_accumulate.metal",
39
+ "gptoss_kernels/source/include/internal/uuid.h",
40
+ "gptoss_kernels/source/include/internal/metal.hpp",
41
+ "gptoss_kernels/source/include/internal/datatype.h",
42
+ "gptoss_kernels/source/include/internal/rng.h",
43
+ "gptoss_kernels/source/include/internal/rng.hpp",
44
+ "gptoss_kernels/source/include/internal/log.h",
45
+ "gptoss_kernels/source/include/internal/macros.h",
46
+ "gptoss_kernels/source/include/internal/storage.h",
47
+ "gptoss_kernels/source/include/internal/model.h",
48
+ "gptoss_kernels/source/include/internal/math.h",
49
+ "gptoss_kernels/source/include/internal/metal.h",
50
+ "gptoss_kernels/source/include/internal/kernel-args.h",
51
+ "gptoss_kernels/source/include/internal/datatype.hpp",
52
+ "gptoss_kernels/source/include/internal/metal-kernels.h",
53
  ]
54
 
55
  include = ["gptoss_kernels/source/include", "gptoss_kernels/include", "."]
build/torch28-metal-aarch64-darwin/gptoss_kernels/__init__.py ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ from ._ops import ops
2
+ import torch
3
+
4
+ def f32_bf16w_matmul(input: torch.Tensor, weight_bf16: torch.Tensor, bias_bf16: torch.Tensor, output: torch.Tensor, num_tokens: int, num_cols: int, num_rows: int, threadgroup_size: int) -> None:
5
+ ops.f32_bf16w_matmul_torch(input, weight_bf16, bias_bf16, output, num_tokens, num_cols, num_rows, threadgroup_size)
6
+ return output
7
+
8
+ __all__ = ["f32_bf16w_matmul"]
build/torch28-metal-aarch64-darwin/gptoss_kernels/__pycache__/__init__.cpython-313.pyc ADDED
Binary file (909 Bytes). View file
 
build/torch28-metal-aarch64-darwin/gptoss_kernels/__pycache__/_ops.cpython-313.pyc ADDED
Binary file (593 Bytes). View file
 
build/torch28-metal-aarch64-darwin/gptoss_kernels/_gptoss_kernels_b5918a0_dirty.abi3.so ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4bf1d8a4e3db56514354880d5f32f6604c25ef5a3951843ccdeb90bc5ef1db30
3
+ size 125528
{torch-ext → build/torch28-metal-aarch64-darwin}/gptoss_kernels/_ops.py RENAMED
@@ -1,9 +1,9 @@
1
  import torch
2
- from . import _gptoss_kernels_931bc1b_dirty
3
- ops = torch.ops._gptoss_kernels_931bc1b_dirty
4
 
5
  def add_op_namespace_prefix(op_name: str):
6
  """
7
  Prefix op by namespace.
8
  """
9
- return f"_gptoss_kernels_931bc1b_dirty::{op_name}"
 
1
  import torch
2
+ from . import _gptoss_kernels_b5918a0_dirty
3
+ ops = torch.ops._gptoss_kernels_b5918a0_dirty
4
 
5
  def add_op_namespace_prefix(op_name: str):
6
  """
7
  Prefix op by namespace.
8
  """
9
+ return f"_gptoss_kernels_b5918a0_dirty::{op_name}"
build/torch28-metal-aarch64-darwin/gptoss_kernels/test.py ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ import _gptoss_kernels_931bc1b_dirty
2
+ import torch
3
+
4
+ print(dir(_gptoss_kernels_931bc1b_dirty))
5
+
6
+ from gptoss_kernels import _gptoss_kernels_931bc1b_dirty
build/torch29-metal-aarch64-darwin/gptoss_kernels/__init__.py ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ from ._ops import ops
2
+ import torch
3
+
4
+ def f32_bf16w_matmul(input: torch.Tensor, weight_bf16: torch.Tensor, bias_bf16: torch.Tensor, output: torch.Tensor, num_tokens: int, num_cols: int, num_rows: int, threadgroup_size: int) -> None:
5
+ ops.f32_bf16w_matmul_torch(input, weight_bf16, bias_bf16, output, num_tokens, num_cols, num_rows, threadgroup_size)
6
+ return output
7
+
8
+ __all__ = ["f32_bf16w_matmul"]
build/torch29-metal-aarch64-darwin/gptoss_kernels/__pycache__/__init__.cpython-313.pyc ADDED
Binary file (909 Bytes). View file
 
build/torch29-metal-aarch64-darwin/gptoss_kernels/__pycache__/_ops.cpython-313.pyc ADDED
Binary file (593 Bytes). View file
 
torch-ext/gptoss_kernels/_gptoss_kernels_931bc1b_dirty.abi3.so → build/torch29-metal-aarch64-darwin/gptoss_kernels/_gptoss_kernels_b5918a0_dirty.abi3.so RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:31cddc1925c6c7901a5619ff55420ae6249d2c48de202a23a7c4534e4ccdcd4c
3
  size 126536
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:257007386e3cbb53fa1bab940371899d8511fb1e429cfc476a9aea87ace35aab
3
  size 126536
build/torch29-metal-aarch64-darwin/gptoss_kernels/_ops.py ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ import torch
2
+ from . import _gptoss_kernels_b5918a0_dirty
3
+ ops = torch.ops._gptoss_kernels_b5918a0_dirty
4
+
5
+ def add_op_namespace_prefix(op_name: str):
6
+ """
7
+ Prefix op by namespace.
8
+ """
9
+ return f"_gptoss_kernels_b5918a0_dirty::{op_name}"
build/torch29-metal-aarch64-darwin/gptoss_kernels/test.py ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ import _gptoss_kernels_931bc1b_dirty
2
+ import torch
3
+
4
+ print(dir(_gptoss_kernels_931bc1b_dirty))
5
+
6
+ from gptoss_kernels import _gptoss_kernels_931bc1b_dirty
cmake/compile-metal.cmake CHANGED
@@ -4,12 +4,22 @@ function(compile_metal_shaders TARGET_NAME METAL_SOURCES)
4
  find_program(METAL_COMPILER xcrun REQUIRED)
5
 
6
  # Set Metal compiler flags
7
- set(METAL_FLAGS "-std=metal3.0" "-O2")
8
 
9
  # Output directory for compiled metallib
10
  set(METALLIB_OUTPUT_DIR "${CMAKE_BINARY_DIR}/metallib")
11
  file(MAKE_DIRECTORY ${METALLIB_OUTPUT_DIR})
12
 
 
 
 
 
 
 
 
 
 
 
13
  # Separate .metal files from .h files and compile .metal files to .air
14
  set(AIR_FILES)
15
  set(METAL_FILES)
 
4
  find_program(METAL_COMPILER xcrun REQUIRED)
5
 
6
  # Set Metal compiler flags
7
+ set(METAL_FLAGS "-std=metal3.2" "-O2")
8
 
9
  # Output directory for compiled metallib
10
  set(METALLIB_OUTPUT_DIR "${CMAKE_BINARY_DIR}/metallib")
11
  file(MAKE_DIRECTORY ${METALLIB_OUTPUT_DIR})
12
 
13
+ set(METAL_INCLUDE_DIRS
14
+ "${CMAKE_SOURCE_DIR}/gptoss_kernels/source/include"
15
+ "${CMAKE_SOURCE_DIR}/gptoss_kernels/include"
16
+ "${CMAKE_SOURCE_DIR}/."
17
+ )
18
+
19
+ foreach(INC ${METAL_INCLUDE_DIRS})
20
+ list(APPEND METAL_FLAGS "-I${INC}")
21
+ endforeach()
22
+
23
  # Separate .metal files from .h files and compile .metal files to .air
24
  set(AIR_FILES)
25
  set(METAL_FILES)
flake.lock CHANGED
@@ -98,11 +98,11 @@
98
  ]
99
  },
100
  "locked": {
101
- "lastModified": 1761991868,
102
  "narHash": "sha256-+csvkWC9jC4mwq1LNfK4O6m3Qg4dCCXjP5JGdPa3TEo=",
103
  "owner": "huggingface",
104
  "repo": "kernel-builder",
105
- "rev": "79cbfcdfde82c8847551f67f4b951a410794a5c6",
106
  "type": "github"
107
  },
108
  "original": {
 
98
  ]
99
  },
100
  "locked": {
101
+ "lastModified": 1761998455,
102
  "narHash": "sha256-+csvkWC9jC4mwq1LNfK4O6m3Qg4dCCXjP5JGdPa3TEo=",
103
  "owner": "huggingface",
104
  "repo": "kernel-builder",
105
+ "rev": "25ea190b1dca356f8bec2ec7cb92507b11ca62a1",
106
  "type": "github"
107
  },
108
  "original": {
flake.nix CHANGED
@@ -2,7 +2,7 @@
2
  description = "Flake for Torch kernel extension";
3
 
4
  inputs = {
5
- kernel-builder.url = "github:huggingface/kernel-builder?ref=metal_kernels";
6
  };
7
 
8
  outputs = { self, kernel-builder, }:
 
2
  description = "Flake for Torch kernel extension";
3
 
4
  inputs = {
5
+ kernel-builder.url = "github:huggingface/kernel-builder/metal_kernels";
6
  };
7
 
8
  outputs = { self, kernel-builder, }:
setup.py CHANGED
@@ -108,7 +108,7 @@ setup(
108
  name="gptoss_kernels",
109
  # The version is just a stub, it's not used by the final build artefact.
110
  version="0.1.0",
111
- ext_modules=[CMakeExtension("gptoss_kernels._gptoss_kernels_931bc1b_dirty")],
112
  cmdclass={"build_ext": CMakeBuild},
113
  packages=find_packages(where="torch-ext", include=["gptoss_kernels*"]),
114
  package_dir={"": "torch-ext"},
 
108
  name="gptoss_kernels",
109
  # The version is just a stub, it's not used by the final build artefact.
110
  version="0.1.0",
111
+ ext_modules=[CMakeExtension("gptoss_kernels._gptoss_kernels_3f5b0c9")],
112
  cmdclass={"build_ext": CMakeBuild},
113
  packages=find_packages(where="torch-ext", include=["gptoss_kernels*"]),
114
  package_dir={"": "torch-ext"},