{"id":528,"date":"2024-01-18T13:01:50","date_gmt":"2024-01-18T04:01:50","guid":{"rendered":"https:\/\/elosove.com\/?p=528"},"modified":"2024-01-18T13:01:50","modified_gmt":"2024-01-18T04:01:50","slug":"deepspeedv0-11-2%e3%82%92windows%e3%81%ab%e3%82%a4%e3%83%b3%e3%82%b9%e3%83%88%e3%83%bc%e3%83%ab%e3%81%99%e3%82%8b%e6%96%b9%e6%b3%95","status":"publish","type":"post","link":"https:\/\/elosove.com\/?p=528","title":{"rendered":"DeepSpeed[v0.11.2]\u3092Windows\u306b\u30a4\u30f3\u30b9\u30c8\u30fc\u30eb\u3059\u308b\u65b9\u6cd5"},"content":{"rendered":"\n<p class=\"wp-block-paragraph\">\u4eca\u56de\u306fDeepSpeed\u306ev0.11.2\u3092\u30d3\u30eb\u30c9&amp;\u30a4\u30f3\u30b9\u30c8\u30fc\u30eb\u3059\u308b\u65b9\u6cd5\u3092\u7d39\u4ecb\u3057\u307e\u3059\u3002<\/p>\n\n\n\n<p class=\"wp-block-paragraph\">\u30d3\u30eb\u30c9\u304c\u3081\u3093\u3069\u304f\u3055\u3044\u4eba\u306fv0.11.1\u306ewhl\u30d5\u30a1\u30a4\u30eb\u304c\u516c\u958b\u3055\u308c\u3066\u3044\u308b\u306e\u3067DL\u3057\u3066pip install\u3057\u3066\u304f\u3060\u3055\u3044\u3002[<a href=\"https:\/\/github.com\/oobabooga\/text-generation-webui\/files\/13593455\/deepspeed-0.11.1%2Be9503fe-cp311-cp311-win_amd64.rar.zip\">deepspeed-0.11.1+e9503fe-cp311-cp311-win_amd64.rar.zip<\/a>]<\/p>\n\n\n\n<p class=\"wp-block-paragraph\">(\u53c2\u8003\u60c5\u5831 : <a href=\"https:\/\/github.com\/oobabooga\/text-generation-webui\/issues\/4734\" data-type=\"link\" data-id=\"https:\/\/github.com\/oobabooga\/text-generation-webui\/issues\/4734\">Installing DeepSpeed on Windows<\/a>)<\/p>\n\n\n\n<p class=\"wp-block-paragraph\">(v0.8.3\u306e\u30a4\u30f3\u30b9\u30c8\u30fc\u30eb\u65b9\u6cd5\u306f<a href=\"https:\/\/elosove.com\/?p=516\" target=\"_blank\" rel=\"noreferrer noopener\">\u3053\u3061\u3089<\/a>)<\/p>\n\n\n\n<h2 class=\"wp-block-heading\">\u30c6\u30b9\u30c8\u74b0\u5883<\/h2>\n\n\n\n<blockquote class=\"wp-block-quote is-layout-flow wp-block-quote-is-layout-flow\">\n<p class=\"wp-block-paragraph\">OS : Windows 10<\/p>\n\n\n\n<p class=\"wp-block-paragraph\">CUDA : 11.8<\/p>\n\n\n\n<p class=\"wp-block-paragraph\">Visual C++ build tools : Visual Studio 2022 community<\/p>\n\n\n\n<p class=\"wp-block-paragraph\">Python : 3.11.7 (3.10\u7cfb\u3067\u3082\u52d5\u304f\u305d\u3046, Anaconda\u74b0\u5883\u4e0b)<\/p>\n\n\n\n<p class=\"wp-block-paragraph\">PyTroch : 2.1.1<\/p>\n<\/blockquote>\n\n\n\n<p class=\"wp-block-paragraph\">\u203bCUDA\u7cfb\u306e\u30d1\u30b9(CUDA_HOME \/ CUDA_PATH)\u306f\u901a\u3063\u3066\u308b\u524d\u63d0\u3067\u3059\u3002<\/p>\n\n\n\n<p class=\"wp-block-paragraph\">\u3000<\/p>\n\n\n\n<h3 class=\"wp-block-heading\">1 : DeepSpeed\u306eclone<\/h3>\n\n\n\n<blockquote class=\"wp-block-quote is-layout-flow wp-block-quote-is-layout-flow\">\n<p class=\"wp-block-paragraph\">git clone <strong>&#8211;branch v0.11.2<\/strong> https:\/\/github.com\/microsoft\/DeepSpeed.git<\/p>\n<\/blockquote>\n\n\n\n<p class=\"wp-block-paragraph\">\u3000<\/p>\n\n\n\n<h3 class=\"wp-block-heading\">2 : build_win.bat\u306e\u7de8\u96c6<\/h3>\n\n\n\n<blockquote class=\"wp-block-quote is-layout-flow wp-block-quote-is-layout-flow\">\n<p class=\"wp-block-paragraph\">set DS_BUILD_EVOFORMER_ATTN=0<\/p>\n<\/blockquote>\n\n\n\n<p class=\"wp-block-paragraph\">\u3092&#8221;set DS_BUILD_SPARSE_ATTN=0&#8243;\u306e\u5f8c\u308d\u306b\u8ffd\u52a0<\/p>\n\n\n\n<p class=\"wp-block-paragraph\">\u3000<\/p>\n\n\n\n<h3 class=\"wp-block-heading\">3 : \u30b3\u30fc\u30c9\u306e\u4fee\u6b63<\/h3>\n\n\n\n<p class=\"has-black-color has-text-color has-link-color wp-elements-48a280452dec6fcf7cf0df4a4a63ea71 wp-block-paragraph\">\u30fb<strong>DeepSpeed\/csrc\/quantization\/pt_binding.cpp<\/strong><\/p>\n\n\n\n<p class=\"wp-block-paragraph\">244-250\u884c\u76ee\u3092\u4e0b\u8a18\u306b\u4fee\u6b63<\/p>\n\n\n\n<div class=\"wp-block-kevinbatdorf-code-block-pro\" data-code-block-pro-font-family=\"Code-Pro-JetBrains-Mono\" style=\"font-size:.875rem;font-family:Code-Pro-JetBrains-Mono,ui-monospace,SFMono-Regular,Menlo,Monaco,Consolas,monospace;line-height:1.25rem;--cbp-tab-width:2;tab-size:var(--cbp-tab-width, 2)\"><span style=\"display:block;padding:16px 0 0 16px;margin-bottom:-1px;width:100%;text-align:left;background-color:#1E1E1E\"><svg xmlns=\"http:\/\/www.w3.org\/2000\/svg\" width=\"54\" height=\"14\" viewBox=\"0 0 54 14\"><g fill=\"none\" fill-rule=\"evenodd\" transform=\"translate(1 1)\"><circle cx=\"6\" cy=\"6\" r=\"6\" fill=\"#FF5F56\" stroke=\"#E0443E\" stroke-width=\".5\"><\/circle><circle cx=\"26\" cy=\"6\" r=\"6\" fill=\"#FFBD2E\" stroke=\"#DEA123\" stroke-width=\".5\"><\/circle><circle cx=\"46\" cy=\"6\" r=\"6\" fill=\"#27C93F\" stroke=\"#1AAB29\" stroke-width=\".5\"><\/circle><\/g><\/svg><\/span><span role=\"button\" tabindex=\"0\" data-code=\"    std::vector&lt;int64_t&gt; sz_vector(input_vals.sizes().begin(), input_vals.sizes().end());\n    sz_vector[sz_vector.size() - 1] = sz_vector.back() \/ devices_per_node;  \/\/ num of GPU per nodes\n    at::IntArrayRef sz(sz_vector);\n    auto output = torch::empty(sz, output_options);\n\n    const int elems_per_in_tensor = at::numel(input_vals) \/ devices_per_node;\n    const int elems_per_in_group = elems_per_in_tensor \/ (in_groups \/ devices_per_node);\n    const int elems_per_out_group = elems_per_in_tensor \/ out_groups;\" style=\"color:#D4D4D4;display:none\" aria-label=\"Copy\" class=\"code-block-pro-copy-button\"><svg xmlns=\"http:\/\/www.w3.org\/2000\/svg\" style=\"width:24px;height:24px\" fill=\"none\" viewBox=\"0 0 24 24\" stroke=\"currentColor\" stroke-width=\"2\"><path class=\"with-check\" stroke-linecap=\"round\" stroke-linejoin=\"round\" d=\"M9 5H7a2 2 0 00-2 2v12a2 2 0 002 2h10a2 2 0 002-2V7a2 2 0 00-2-2h-2M9 5a2 2 0 002 2h2a2 2 0 002-2M9 5a2 2 0 012-2h2a2 2 0 012 2m-6 9l2 2 4-4\"><\/path><path class=\"without-check\" stroke-linecap=\"round\" stroke-linejoin=\"round\" d=\"M9 5H7a2 2 0 00-2 2v12a2 2 0 002 2h10a2 2 0 002-2V7a2 2 0 00-2-2h-2M9 5a2 2 0 002 2h2a2 2 0 002-2M9 5a2 2 0 012-2h2a2 2 0 012 2\"><\/path><\/svg><\/span><pre class=\"shiki dark-plus\" style=\"background-color: #1E1E1E\" tabindex=\"0\"><code><span class=\"line\"><span style=\"color: #D4D4D4\">    <\/span><span style=\"color: #4EC9B0\">std<\/span><span style=\"color: #D4D4D4\">::<\/span><span style=\"color: #4EC9B0\">vector<\/span><span style=\"color: #D4D4D4\">&lt;<\/span><span style=\"color: #569CD6\">int64_t<\/span><span style=\"color: #D4D4D4\">&gt; <\/span><span style=\"color: #DCDCAA\">sz_vector<\/span><span style=\"color: #D4D4D4\">(<\/span><span style=\"color: #4EC9B0\">input_vals<\/span><span style=\"color: #D4D4D4\">.<\/span><span style=\"color: #DCDCAA\">sizes<\/span><span style=\"color: #D4D4D4\">().<\/span><span style=\"color: #DCDCAA\">begin<\/span><span style=\"color: #D4D4D4\">(), <\/span><span style=\"color: #4EC9B0\">input_vals<\/span><span style=\"color: #D4D4D4\">.<\/span><span style=\"color: #DCDCAA\">sizes<\/span><span style=\"color: #D4D4D4\">().<\/span><span style=\"color: #DCDCAA\">end<\/span><span style=\"color: #D4D4D4\">());<\/span><\/span>\n<span class=\"line\"><span style=\"color: #D4D4D4\">    <\/span><span style=\"color: #9CDCFE\">sz_vector<\/span><span style=\"color: #D4D4D4\">[<\/span><span style=\"color: #9CDCFE\">sz_vector<\/span><span style=\"color: #D4D4D4\">.<\/span><span style=\"color: #DCDCAA\">size<\/span><span style=\"color: #D4D4D4\">() - <\/span><span style=\"color: #B5CEA8\">1<\/span><span style=\"color: #D4D4D4\">] = <\/span><span style=\"color: #9CDCFE\">sz_vector<\/span><span style=\"color: #D4D4D4\">.<\/span><span style=\"color: #DCDCAA\">back<\/span><span style=\"color: #D4D4D4\">() \/ devices_per_node;<\/span><span style=\"color: #6A9955\">  \/\/ num of GPU per nodes<\/span><\/span>\n<span class=\"line\"><span style=\"color: #D4D4D4\">    <\/span><span style=\"color: #4EC9B0\">at<\/span><span style=\"color: #D4D4D4\">::<\/span><span style=\"color: #4EC9B0\">IntArrayRef<\/span><span style=\"color: #D4D4D4\"> <\/span><span style=\"color: #DCDCAA\">sz<\/span><span style=\"color: #D4D4D4\">(<\/span><span style=\"color: #4EC9B0\">sz_vector<\/span><span style=\"color: #D4D4D4\">);<\/span><\/span>\n<span class=\"line\"><span style=\"color: #D4D4D4\">    <\/span><span style=\"color: #569CD6\">auto<\/span><span style=\"color: #D4D4D4\"> output = <\/span><span style=\"color: #4EC9B0\">torch<\/span><span style=\"color: #D4D4D4\">::<\/span><span style=\"color: #DCDCAA\">empty<\/span><span style=\"color: #D4D4D4\">(sz, output_options);<\/span><\/span>\n<span class=\"line\"><\/span>\n<span class=\"line\"><span style=\"color: #D4D4D4\">    <\/span><span style=\"color: #569CD6\">const<\/span><span style=\"color: #D4D4D4\"> <\/span><span style=\"color: #569CD6\">int<\/span><span style=\"color: #D4D4D4\"> elems_per_in_tensor = <\/span><span style=\"color: #4EC9B0\">at<\/span><span style=\"color: #D4D4D4\">::<\/span><span style=\"color: #DCDCAA\">numel<\/span><span style=\"color: #D4D4D4\">(input_vals) \/ devices_per_node;<\/span><\/span>\n<span class=\"line\"><span style=\"color: #D4D4D4\">    <\/span><span style=\"color: #569CD6\">const<\/span><span style=\"color: #D4D4D4\"> <\/span><span style=\"color: #569CD6\">int<\/span><span style=\"color: #D4D4D4\"> elems_per_in_group = elems_per_in_tensor \/ (in_groups \/ devices_per_node);<\/span><\/span>\n<span class=\"line\"><span style=\"color: #D4D4D4\">    <\/span><span style=\"color: #569CD6\">const<\/span><span style=\"color: #D4D4D4\"> <\/span><span style=\"color: #569CD6\">int<\/span><span style=\"color: #D4D4D4\"> elems_per_out_group = elems_per_in_tensor \/ out_groups;<\/span><\/span><\/code><\/pre><\/div>\n\n\n\n<p class=\"wp-block-paragraph\">\u3000<\/p>\n\n\n\n<p class=\"wp-block-paragraph\">\u3000<\/p>\n\n\n\n<p class=\"has-black-color has-text-color has-link-color wp-elements-2c549cc4ec099235de32f6a4292b1df7 wp-block-paragraph\"><strong>\u30fbDeepSpeed\/csrc\/transformer\/inference\/csrc\/pt_binding.cpp<\/strong><\/p>\n\n\n\n<p class=\"wp-block-paragraph\">541-542\u884c\u76ee\u3092\u4e0b\u8a18\u306b\u4fee\u6b63<\/p>\n\n\n\n<div class=\"wp-block-kevinbatdorf-code-block-pro\" data-code-block-pro-font-family=\"Code-Pro-JetBrains-Mono\" style=\"font-size:.875rem;font-family:Code-Pro-JetBrains-Mono,ui-monospace,SFMono-Regular,Menlo,Monaco,Consolas,monospace;line-height:1.25rem;--cbp-tab-width:2;tab-size:var(--cbp-tab-width, 2)\"><span style=\"display:block;padding:16px 0 0 16px;margin-bottom:-1px;width:100%;text-align:left;background-color:#1E1E1E\"><svg xmlns=\"http:\/\/www.w3.org\/2000\/svg\" width=\"54\" height=\"14\" viewBox=\"0 0 54 14\"><g fill=\"none\" fill-rule=\"evenodd\" transform=\"translate(1 1)\"><circle cx=\"6\" cy=\"6\" r=\"6\" fill=\"#FF5F56\" stroke=\"#E0443E\" stroke-width=\".5\"><\/circle><circle cx=\"26\" cy=\"6\" r=\"6\" fill=\"#FFBD2E\" stroke=\"#DEA123\" stroke-width=\".5\"><\/circle><circle cx=\"46\" cy=\"6\" r=\"6\" fill=\"#27C93F\" stroke=\"#1AAB29\" stroke-width=\".5\"><\/circle><\/g><\/svg><\/span><span role=\"button\" tabindex=\"0\" data-code=\"\t\t\t\t\t\t\t\t\t {static_cast&lt;unsigned&gt;(hidden_dim * InferenceContext::Instance().GetMaxTokenLength()),\n\t\t\t\t\t\t\t\t\t  static_cast&lt;unsigned&gt;(k * InferenceContext::Instance().GetMaxTokenLength()),\" style=\"color:#D4D4D4;display:none\" aria-label=\"Copy\" class=\"code-block-pro-copy-button\"><svg xmlns=\"http:\/\/www.w3.org\/2000\/svg\" style=\"width:24px;height:24px\" fill=\"none\" viewBox=\"0 0 24 24\" stroke=\"currentColor\" stroke-width=\"2\"><path class=\"with-check\" stroke-linecap=\"round\" stroke-linejoin=\"round\" d=\"M9 5H7a2 2 0 00-2 2v12a2 2 0 002 2h10a2 2 0 002-2V7a2 2 0 00-2-2h-2M9 5a2 2 0 002 2h2a2 2 0 002-2M9 5a2 2 0 012-2h2a2 2 0 012 2m-6 9l2 2 4-4\"><\/path><path class=\"without-check\" stroke-linecap=\"round\" stroke-linejoin=\"round\" d=\"M9 5H7a2 2 0 00-2 2v12a2 2 0 002 2h10a2 2 0 002-2V7a2 2 0 00-2-2h-2M9 5a2 2 0 002 2h2a2 2 0 002-2M9 5a2 2 0 012-2h2a2 2 0 012 2\"><\/path><\/svg><\/span><pre class=\"shiki dark-plus\" style=\"background-color: #1E1E1E\" tabindex=\"0\"><code><span class=\"line\"><span style=\"color: #D4D4D4\">\t\t\t\t\t\t\t\t\t {<\/span><span style=\"color: #569CD6\">static_cast<\/span><span style=\"color: #D4D4D4\">&lt;<\/span><span style=\"color: #569CD6\">unsigned<\/span><span style=\"color: #D4D4D4\">&gt;(hidden_dim * <\/span><span style=\"color: #4EC9B0\">InferenceContext<\/span><span style=\"color: #D4D4D4\">::<\/span><span style=\"color: #DCDCAA\">Instance<\/span><span style=\"color: #D4D4D4\">().<\/span><span style=\"color: #DCDCAA\">GetMaxTokenLength<\/span><span style=\"color: #D4D4D4\">()),<\/span><\/span>\n<span class=\"line\"><span style=\"color: #D4D4D4\">\t\t\t\t\t\t\t\t\t  <\/span><span style=\"color: #569CD6\">static_cast<\/span><span style=\"color: #D4D4D4\">&lt;<\/span><span style=\"color: #569CD6\">unsigned<\/span><span style=\"color: #D4D4D4\">&gt;(k * <\/span><span style=\"color: #4EC9B0\">InferenceContext<\/span><span style=\"color: #D4D4D4\">::<\/span><span style=\"color: #DCDCAA\">Instance<\/span><span style=\"color: #D4D4D4\">().<\/span><span style=\"color: #DCDCAA\">GetMaxTokenLength<\/span><span style=\"color: #D4D4D4\">()),<\/span><\/span><\/code><\/pre><\/div>\n\n\n\n<p class=\"wp-block-paragraph\">550-551\u884c\u76ee\u3092\u4e0b\u8a18\u306b\u4fee\u6b63<\/p>\n\n\n\n<div class=\"wp-block-kevinbatdorf-code-block-pro\" data-code-block-pro-font-family=\"Code-Pro-JetBrains-Mono\" style=\"font-size:.875rem;font-family:Code-Pro-JetBrains-Mono,ui-monospace,SFMono-Regular,Menlo,Monaco,Consolas,monospace;line-height:1.25rem;--cbp-tab-width:2;tab-size:var(--cbp-tab-width, 2)\"><span style=\"display:block;padding:16px 0 0 16px;margin-bottom:-1px;width:100%;text-align:left;background-color:#1E1E1E\"><svg xmlns=\"http:\/\/www.w3.org\/2000\/svg\" width=\"54\" height=\"14\" viewBox=\"0 0 54 14\"><g fill=\"none\" fill-rule=\"evenodd\" transform=\"translate(1 1)\"><circle cx=\"6\" cy=\"6\" r=\"6\" fill=\"#FF5F56\" stroke=\"#E0443E\" stroke-width=\".5\"><\/circle><circle cx=\"26\" cy=\"6\" r=\"6\" fill=\"#FFBD2E\" stroke=\"#DEA123\" stroke-width=\".5\"><\/circle><circle cx=\"46\" cy=\"6\" r=\"6\" fill=\"#27C93F\" stroke=\"#1AAB29\" stroke-width=\".5\"><\/circle><\/g><\/svg><\/span><span role=\"button\" tabindex=\"0\" data-code=\"\t\t\t\t\t\t {static_cast&lt;unsigned&gt;(hidden_dim * InferenceContext::Instance().GetMaxTokenLength()),\n\t\t\t\t\t\t  static_cast&lt;unsigned&gt;(k * InferenceContext::Instance().GetMaxTokenLength()),\" style=\"color:#D4D4D4;display:none\" aria-label=\"Copy\" class=\"code-block-pro-copy-button\"><svg xmlns=\"http:\/\/www.w3.org\/2000\/svg\" style=\"width:24px;height:24px\" fill=\"none\" viewBox=\"0 0 24 24\" stroke=\"currentColor\" stroke-width=\"2\"><path class=\"with-check\" stroke-linecap=\"round\" stroke-linejoin=\"round\" d=\"M9 5H7a2 2 0 00-2 2v12a2 2 0 002 2h10a2 2 0 002-2V7a2 2 0 00-2-2h-2M9 5a2 2 0 002 2h2a2 2 0 002-2M9 5a2 2 0 012-2h2a2 2 0 012 2m-6 9l2 2 4-4\"><\/path><path class=\"without-check\" stroke-linecap=\"round\" stroke-linejoin=\"round\" d=\"M9 5H7a2 2 0 00-2 2v12a2 2 0 002 2h10a2 2 0 002-2V7a2 2 0 00-2-2h-2M9 5a2 2 0 002 2h2a2 2 0 002-2M9 5a2 2 0 012-2h2a2 2 0 012 2\"><\/path><\/svg><\/span><pre class=\"shiki dark-plus\" style=\"background-color: #1E1E1E\" tabindex=\"0\"><code><span class=\"line\"><span style=\"color: #D4D4D4\">\t\t\t\t\t\t {<\/span><span style=\"color: #569CD6\">static_cast<\/span><span style=\"color: #D4D4D4\">&lt;<\/span><span style=\"color: #569CD6\">unsigned<\/span><span style=\"color: #D4D4D4\">&gt;(hidden_dim * <\/span><span style=\"color: #4EC9B0\">InferenceContext<\/span><span style=\"color: #D4D4D4\">::<\/span><span style=\"color: #DCDCAA\">Instance<\/span><span style=\"color: #D4D4D4\">().<\/span><span style=\"color: #DCDCAA\">GetMaxTokenLength<\/span><span style=\"color: #D4D4D4\">()),<\/span><\/span>\n<span class=\"line\"><span style=\"color: #D4D4D4\">\t\t\t\t\t\t  <\/span><span style=\"color: #569CD6\">static_cast<\/span><span style=\"color: #D4D4D4\">&lt;<\/span><span style=\"color: #569CD6\">unsigned<\/span><span style=\"color: #D4D4D4\">&gt;(k * <\/span><span style=\"color: #4EC9B0\">InferenceContext<\/span><span style=\"color: #D4D4D4\">::<\/span><span style=\"color: #DCDCAA\">Instance<\/span><span style=\"color: #D4D4D4\">().<\/span><span style=\"color: #DCDCAA\">GetMaxTokenLength<\/span><span style=\"color: #D4D4D4\">()),<\/span><\/span><\/code><\/pre><\/div>\n\n\n\n<p class=\"wp-block-paragraph\">1581\u884c\u76ee\u3092\u4e0b\u8a18\u306b\u4fee\u6b63<\/p>\n\n\n\n<div class=\"wp-block-kevinbatdorf-code-block-pro\" data-code-block-pro-font-family=\"Code-Pro-JetBrains-Mono\" style=\"font-size:.875rem;font-family:Code-Pro-JetBrains-Mono,ui-monospace,SFMono-Regular,Menlo,Monaco,Consolas,monospace;line-height:1.25rem;--cbp-tab-width:2;tab-size:var(--cbp-tab-width, 2)\"><span style=\"display:block;padding:16px 0 0 16px;margin-bottom:-1px;width:100%;text-align:left;background-color:#1E1E1E\"><svg xmlns=\"http:\/\/www.w3.org\/2000\/svg\" width=\"54\" height=\"14\" viewBox=\"0 0 54 14\"><g fill=\"none\" fill-rule=\"evenodd\" transform=\"translate(1 1)\"><circle cx=\"6\" cy=\"6\" r=\"6\" fill=\"#FF5F56\" stroke=\"#E0443E\" stroke-width=\".5\"><\/circle><circle cx=\"26\" cy=\"6\" r=\"6\" fill=\"#FFBD2E\" stroke=\"#DEA123\" stroke-width=\".5\"><\/circle><circle cx=\"46\" cy=\"6\" r=\"6\" fill=\"#27C93F\" stroke=\"#1AAB29\" stroke-width=\".5\"><\/circle><\/g><\/svg><\/span><span role=\"button\" tabindex=\"0\" data-code=\"\t\tat::from_blob(intermediate_ptr, {input.size(0), input.size(1), static_cast&lt;int64_t&gt;(mlp_1_out_neurons)}, options);\" style=\"color:#D4D4D4;display:none\" aria-label=\"Copy\" class=\"code-block-pro-copy-button\"><svg xmlns=\"http:\/\/www.w3.org\/2000\/svg\" style=\"width:24px;height:24px\" fill=\"none\" viewBox=\"0 0 24 24\" stroke=\"currentColor\" stroke-width=\"2\"><path class=\"with-check\" stroke-linecap=\"round\" stroke-linejoin=\"round\" d=\"M9 5H7a2 2 0 00-2 2v12a2 2 0 002 2h10a2 2 0 002-2V7a2 2 0 00-2-2h-2M9 5a2 2 0 002 2h2a2 2 0 002-2M9 5a2 2 0 012-2h2a2 2 0 012 2m-6 9l2 2 4-4\"><\/path><path class=\"without-check\" stroke-linecap=\"round\" stroke-linejoin=\"round\" d=\"M9 5H7a2 2 0 00-2 2v12a2 2 0 002 2h10a2 2 0 002-2V7a2 2 0 00-2-2h-2M9 5a2 2 0 002 2h2a2 2 0 002-2M9 5a2 2 0 012-2h2a2 2 0 012 2\"><\/path><\/svg><\/span><pre class=\"shiki dark-plus\" style=\"background-color: #1E1E1E\" tabindex=\"0\"><code><span class=\"line\"><span style=\"color: #D4D4D4\">\t\t<\/span><span style=\"color: #4EC9B0\">at<\/span><span style=\"color: #D4D4D4\">::<\/span><span style=\"color: #DCDCAA\">from_blob<\/span><span style=\"color: #D4D4D4\">(intermediate_ptr, {<\/span><span style=\"color: #9CDCFE\">input<\/span><span style=\"color: #D4D4D4\">.<\/span><span style=\"color: #DCDCAA\">size<\/span><span style=\"color: #D4D4D4\">(<\/span><span style=\"color: #B5CEA8\">0<\/span><span style=\"color: #D4D4D4\">), <\/span><span style=\"color: #9CDCFE\">input<\/span><span style=\"color: #D4D4D4\">.<\/span><span style=\"color: #DCDCAA\">size<\/span><span style=\"color: #D4D4D4\">(<\/span><span style=\"color: #B5CEA8\">1<\/span><span style=\"color: #D4D4D4\">), <\/span><span style=\"color: #569CD6\">static_cast<\/span><span style=\"color: #D4D4D4\">&lt;<\/span><span style=\"color: #569CD6\">int64_t<\/span><span style=\"color: #D4D4D4\">&gt;(mlp_1_out_neurons)}, options);<\/span><\/span><\/code><\/pre><\/div>\n\n\n\n<p class=\"wp-block-paragraph\">\u3000<\/p>\n\n\n\n<p class=\"has-black-color has-text-color has-link-color wp-elements-d8a49a2be96f83d8de843f31bf95fa15 wp-block-paragraph\"><strong>\u30fbDeepSpeed\/deepspeed\/env_report.py<\/strong><\/p>\n\n\n\n<p class=\"wp-block-paragraph\">10\u884c\u76ee\u306b\u4e0b\u8a18\u3092\u8ffd\u52a0<\/p>\n\n\n\n<div class=\"wp-block-kevinbatdorf-code-block-pro\" data-code-block-pro-font-family=\"Code-Pro-JetBrains-Mono\" style=\"font-size:.875rem;font-family:Code-Pro-JetBrains-Mono,ui-monospace,SFMono-Regular,Menlo,Monaco,Consolas,monospace;line-height:1.25rem;--cbp-tab-width:2;tab-size:var(--cbp-tab-width, 2)\"><span style=\"display:block;padding:16px 0 0 16px;margin-bottom:-1px;width:100%;text-align:left;background-color:#1E1E1E\"><svg xmlns=\"http:\/\/www.w3.org\/2000\/svg\" width=\"54\" height=\"14\" viewBox=\"0 0 54 14\"><g fill=\"none\" fill-rule=\"evenodd\" transform=\"translate(1 1)\"><circle cx=\"6\" cy=\"6\" r=\"6\" fill=\"#FF5F56\" stroke=\"#E0443E\" stroke-width=\".5\"><\/circle><circle cx=\"26\" cy=\"6\" r=\"6\" fill=\"#FFBD2E\" stroke=\"#DEA123\" stroke-width=\".5\"><\/circle><circle cx=\"46\" cy=\"6\" r=\"6\" fill=\"#27C93F\" stroke=\"#1AAB29\" stroke-width=\".5\"><\/circle><\/g><\/svg><\/span><span role=\"button\" tabindex=\"0\" data-code=\"import psutil\" style=\"color:#D4D4D4;display:none\" aria-label=\"Copy\" class=\"code-block-pro-copy-button\"><svg xmlns=\"http:\/\/www.w3.org\/2000\/svg\" style=\"width:24px;height:24px\" fill=\"none\" viewBox=\"0 0 24 24\" stroke=\"currentColor\" stroke-width=\"2\"><path class=\"with-check\" stroke-linecap=\"round\" stroke-linejoin=\"round\" d=\"M9 5H7a2 2 0 00-2 2v12a2 2 0 002 2h10a2 2 0 002-2V7a2 2 0 00-2-2h-2M9 5a2 2 0 002 2h2a2 2 0 002-2M9 5a2 2 0 012-2h2a2 2 0 012 2m-6 9l2 2 4-4\"><\/path><path class=\"without-check\" stroke-linecap=\"round\" stroke-linejoin=\"round\" d=\"M9 5H7a2 2 0 00-2 2v12a2 2 0 002 2h10a2 2 0 002-2V7a2 2 0 00-2-2h-2M9 5a2 2 0 002 2h2a2 2 0 002-2M9 5a2 2 0 012-2h2a2 2 0 012 2\"><\/path><\/svg><\/span><pre class=\"shiki dark-plus\" style=\"background-color: #1E1E1E\" tabindex=\"0\"><code><span class=\"line\"><span style=\"color: #C586C0\">import<\/span><span style=\"color: #D4D4D4\"> psutil<\/span><\/span><\/code><\/pre><\/div>\n\n\n\n<p class=\"wp-block-paragraph\">\u00a083-100\u884c\u76ee\u306e\u95a2\u6570\u3092\u4e0b\u8a18\u3067\u7f6e\u304d\u63db\u3048(10\u884c\u76ee\u306b\u8ffd\u52a0\u3055\u308c\u3066\u3044\u308b\u306e\u3067\u3001\u884c\u6570\u30ba\u30ec\u306b\u6ce8\u610f)<\/p>\n\n\n\n<div class=\"wp-block-kevinbatdorf-code-block-pro\" data-code-block-pro-font-family=\"Code-Pro-JetBrains-Mono\" style=\"font-size:.875rem;font-family:Code-Pro-JetBrains-Mono,ui-monospace,SFMono-Regular,Menlo,Monaco,Consolas,monospace;line-height:1.25rem;--cbp-tab-width:2;tab-size:var(--cbp-tab-width, 2)\"><span style=\"display:block;padding:16px 0 0 16px;margin-bottom:-1px;width:100%;text-align:left;background-color:#1E1E1E\"><svg xmlns=\"http:\/\/www.w3.org\/2000\/svg\" width=\"54\" height=\"14\" viewBox=\"0 0 54 14\"><g fill=\"none\" fill-rule=\"evenodd\" transform=\"translate(1 1)\"><circle cx=\"6\" cy=\"6\" r=\"6\" fill=\"#FF5F56\" stroke=\"#E0443E\" stroke-width=\".5\"><\/circle><circle cx=\"26\" cy=\"6\" r=\"6\" fill=\"#FFBD2E\" stroke=\"#DEA123\" stroke-width=\".5\"><\/circle><circle cx=\"46\" cy=\"6\" r=\"6\" fill=\"#27C93F\" stroke=\"#1AAB29\" stroke-width=\".5\"><\/circle><\/g><\/svg><\/span><span role=\"button\" tabindex=\"0\" data-code=\"def get_shm_size():\n    try:\n        temp_dir = os.getenv('TEMP') or os.getenv('TMP') or os.path.join(os.path.expanduser('~'), 'tmp')\n        shm_stats = psutil.disk_usage(temp_dir)\n        shm_size = shm_stats.total\n        shm_hbytes = human_readable_size(shm_size)\n        warn = []\n        if shm_size &lt; 512 * 1024**2:\n            warn.append(\n                f&quot; {YELLOW} [WARNING] Shared memory size might be too small, consider increasing it. {END}&quot;\n            )\n            # Add additional warnings specific to your use case if needed.\n        return shm_hbytes, warn\n    except Exception as e:\n        return &quot;UNKNOWN&quot;, [f&quot;Error getting shared memory size: {e}&quot;]\" style=\"color:#D4D4D4;display:none\" aria-label=\"Copy\" class=\"code-block-pro-copy-button\"><svg xmlns=\"http:\/\/www.w3.org\/2000\/svg\" style=\"width:24px;height:24px\" fill=\"none\" viewBox=\"0 0 24 24\" stroke=\"currentColor\" stroke-width=\"2\"><path class=\"with-check\" stroke-linecap=\"round\" stroke-linejoin=\"round\" d=\"M9 5H7a2 2 0 00-2 2v12a2 2 0 002 2h10a2 2 0 002-2V7a2 2 0 00-2-2h-2M9 5a2 2 0 002 2h2a2 2 0 002-2M9 5a2 2 0 012-2h2a2 2 0 012 2m-6 9l2 2 4-4\"><\/path><path class=\"without-check\" stroke-linecap=\"round\" stroke-linejoin=\"round\" d=\"M9 5H7a2 2 0 00-2 2v12a2 2 0 002 2h10a2 2 0 002-2V7a2 2 0 00-2-2h-2M9 5a2 2 0 002 2h2a2 2 0 002-2M9 5a2 2 0 012-2h2a2 2 0 012 2\"><\/path><\/svg><\/span><pre class=\"shiki dark-plus\" style=\"background-color: #1E1E1E\" tabindex=\"0\"><code><span class=\"line\"><span style=\"color: #569CD6\">def<\/span><span style=\"color: #D4D4D4\"> <\/span><span style=\"color: #DCDCAA\">get_shm_size<\/span><span style=\"color: #D4D4D4\">():<\/span><\/span>\n<span class=\"line\"><span style=\"color: #D4D4D4\">    <\/span><span style=\"color: #C586C0\">try<\/span><span style=\"color: #D4D4D4\">:<\/span><\/span>\n<span class=\"line\"><span style=\"color: #D4D4D4\">        temp_dir = os.getenv(<\/span><span style=\"color: #CE9178\">&#39;TEMP&#39;<\/span><span style=\"color: #D4D4D4\">) <\/span><span style=\"color: #569CD6\">or<\/span><span style=\"color: #D4D4D4\"> os.getenv(<\/span><span style=\"color: #CE9178\">&#39;TMP&#39;<\/span><span style=\"color: #D4D4D4\">) <\/span><span style=\"color: #569CD6\">or<\/span><span style=\"color: #D4D4D4\"> os.path.join(os.path.expanduser(<\/span><span style=\"color: #CE9178\">&#39;~&#39;<\/span><span style=\"color: #D4D4D4\">), <\/span><span style=\"color: #CE9178\">&#39;tmp&#39;<\/span><span style=\"color: #D4D4D4\">)<\/span><\/span>\n<span class=\"line\"><span style=\"color: #D4D4D4\">        shm_stats = psutil.disk_usage(temp_dir)<\/span><\/span>\n<span class=\"line\"><span style=\"color: #D4D4D4\">        shm_size = shm_stats.total<\/span><\/span>\n<span class=\"line\"><span style=\"color: #D4D4D4\">        shm_hbytes = human_readable_size(shm_size)<\/span><\/span>\n<span class=\"line\"><span style=\"color: #D4D4D4\">        warn = []<\/span><\/span>\n<span class=\"line\"><span style=\"color: #D4D4D4\">        <\/span><span style=\"color: #C586C0\">if<\/span><span style=\"color: #D4D4D4\"> shm_size &lt; <\/span><span style=\"color: #B5CEA8\">512<\/span><span style=\"color: #D4D4D4\"> * <\/span><span style=\"color: #B5CEA8\">1024<\/span><span style=\"color: #D4D4D4\">**<\/span><span style=\"color: #B5CEA8\">2<\/span><span style=\"color: #D4D4D4\">:<\/span><\/span>\n<span class=\"line\"><span style=\"color: #D4D4D4\">            warn.append(<\/span><\/span>\n<span class=\"line\"><span style=\"color: #D4D4D4\">                <\/span><span style=\"color: #569CD6\">f<\/span><span style=\"color: #CE9178\">&quot; <\/span><span style=\"color: #569CD6\">{<\/span><span style=\"color: #D4D4D4\">YELLOW<\/span><span style=\"color: #569CD6\">}<\/span><span style=\"color: #CE9178\"> [WARNING] Shared memory size might be too small, consider increasing it. <\/span><span style=\"color: #569CD6\">{<\/span><span style=\"color: #D4D4D4\">END<\/span><span style=\"color: #569CD6\">}<\/span><span style=\"color: #CE9178\">&quot;<\/span><\/span>\n<span class=\"line\"><span style=\"color: #D4D4D4\">            )<\/span><\/span>\n<span class=\"line\"><span style=\"color: #D4D4D4\">            <\/span><span style=\"color: #6A9955\"># Add additional warnings specific to your use case if needed.<\/span><\/span>\n<span class=\"line\"><span style=\"color: #D4D4D4\">        <\/span><span style=\"color: #C586C0\">return<\/span><span style=\"color: #D4D4D4\"> shm_hbytes, warn<\/span><\/span>\n<span class=\"line\"><span style=\"color: #D4D4D4\">    <\/span><span style=\"color: #C586C0\">except<\/span><span style=\"color: #D4D4D4\"> <\/span><span style=\"color: #4EC9B0\">Exception<\/span><span style=\"color: #D4D4D4\"> <\/span><span style=\"color: #C586C0\">as<\/span><span style=\"color: #D4D4D4\"> e:<\/span><\/span>\n<span class=\"line\"><span style=\"color: #D4D4D4\">        <\/span><span style=\"color: #C586C0\">return<\/span><span style=\"color: #D4D4D4\"> <\/span><span style=\"color: #CE9178\">&quot;UNKNOWN&quot;<\/span><span style=\"color: #D4D4D4\">, [<\/span><span style=\"color: #569CD6\">f<\/span><span style=\"color: #CE9178\">&quot;Error getting shared memory size: <\/span><span style=\"color: #569CD6\">{<\/span><span style=\"color: #D4D4D4\">e<\/span><span style=\"color: #569CD6\">}<\/span><span style=\"color: #CE9178\">&quot;<\/span><span style=\"color: #D4D4D4\">]<\/span><\/span><\/code><\/pre><\/div>\n\n\n\n<p class=\"wp-block-paragraph\">\u3000<\/p>\n\n\n\n<h3 class=\"wp-block-heading\">4 : \u30d3\u30eb\u30c9\u5b9f\u884c<\/h3>\n\n\n\n<p class=\"wp-block-paragraph\">\u7ba1\u7406\u8005\u6a29\u9650\u3067\u30b3\u30de\u30f3\u30c9\u30d7\u30ed\u30f3\u30d7\u30c8\u3092\u958b\u304d\u3001build_win.bat\u3092\u5b9f\u884c\u3002(\u7d50\u69cb\u5f85\u3064)<\/p>\n\n\n\n<figure class=\"wp-block-image size-full\"><img loading=\"lazy\" decoding=\"async\" width=\"372\" height=\"18\" src=\"https:\/\/elosove.com\/wp-content\/uploads\/2024\/01\/build_log2.png\" alt=\"\" class=\"wp-image-531\" srcset=\"https:\/\/elosove.com\/wp-content\/uploads\/2024\/01\/build_log2.png 372w, https:\/\/elosove.com\/wp-content\/uploads\/2024\/01\/build_log2-300x15.png 300w\" sizes=\"auto, (max-width: 372px) 100vw, 372px\" \/><\/figure>\n\n\n\n<p class=\"wp-block-paragraph\">\u3000<\/p>\n\n\n\n<h3 class=\"wp-block-heading\">5 : install<\/h3>\n\n\n\n<p class=\"wp-block-paragraph\">\u30d3\u30eb\u30c9\u306b\u6210\u529f\u3059\u308b\u3068dist\u30d5\u30a9\u30eb\u30c0\u306bwhl\u30d5\u30a1\u30a4\u30eb\u304c\u751f\u6210\u3055\u308c\u308b\u306e\u3067\u3001pip install\u3059\u308b\u3002<\/p>\n\n\n\n<blockquote class=\"wp-block-quote is-layout-flow wp-block-quote-is-layout-flow\">\n<p class=\"wp-block-paragraph\">cd dist<\/p>\n\n\n\n<p class=\"wp-block-paragraph\">pip install deepspeed-0.11.2+f0604078-cp311-cp311-win_amd64.whll<\/p>\n<\/blockquote>\n\n\n\n<p class=\"wp-block-paragraph\">\u203b.whl\u306e\u30d5\u30a1\u30a4\u30eb\u540d\u306f\u74b0\u5883\u306b\u3088\u3063\u3066\u9055\u3044\u307e\u3059\u3002<\/p>\n","protected":false},"excerpt":{"rendered":"<p>\u4eca\u56de\u306fDeepSpeed\u306ev0.11.2\u3092\u30d3\u30eb\u30c9&amp;\u30a4\u30f3\u30b9\u30c8\u30fc\u30eb\u3059\u308b\u65b9\u6cd5\u3092\u7d39\u4ecb\u3057\u307e\u3059\u3002 \u30d3\u30eb\u30c9\u304c\u3081\u3093\u3069\u304f\u3055\u3044\u4eba\u306fv0.11.1\u306ewhl\u30d5\u30a1\u30a4\u30eb\u304c\u516c\u958b\u3055\u308c\u3066\u3044\u308b\u306e\u3067DL\u3057\u3066pip install\u3057\u3066\u304f\u3060\u3055\u3044\u3002[dee [&hellip;]<\/p>\n","protected":false},"author":1,"featured_media":526,"comment_status":"open","ping_status":"open","sticky":false,"template":"","format":"standard","meta":{"footnotes":""},"categories":[4],"tags":[19],"class_list":["post-528","post","type-post","status-publish","format-standard","has-post-thumbnail","hentry","category-tech","tag-deepspeed"],"_links":{"self":[{"href":"https:\/\/elosove.com\/index.php?rest_route=\/wp\/v2\/posts\/528","targetHints":{"allow":["GET"]}}],"collection":[{"href":"https:\/\/elosove.com\/index.php?rest_route=\/wp\/v2\/posts"}],"about":[{"href":"https:\/\/elosove.com\/index.php?rest_route=\/wp\/v2\/types\/post"}],"author":[{"embeddable":true,"href":"https:\/\/elosove.com\/index.php?rest_route=\/wp\/v2\/users\/1"}],"replies":[{"embeddable":true,"href":"https:\/\/elosove.com\/index.php?rest_route=%2Fwp%2Fv2%2Fcomments&post=528"}],"version-history":[{"count":3,"href":"https:\/\/elosove.com\/index.php?rest_route=\/wp\/v2\/posts\/528\/revisions"}],"predecessor-version":[{"id":534,"href":"https:\/\/elosove.com\/index.php?rest_route=\/wp\/v2\/posts\/528\/revisions\/534"}],"wp:featuredmedia":[{"embeddable":true,"href":"https:\/\/elosove.com\/index.php?rest_route=\/wp\/v2\/media\/526"}],"wp:attachment":[{"href":"https:\/\/elosove.com\/index.php?rest_route=%2Fwp%2Fv2%2Fmedia&parent=528"}],"wp:term":[{"taxonomy":"category","embeddable":true,"href":"https:\/\/elosove.com\/index.php?rest_route=%2Fwp%2Fv2%2Fcategories&post=528"},{"taxonomy":"post_tag","embeddable":true,"href":"https:\/\/elosove.com\/index.php?rest_route=%2Fwp%2Fv2%2Ftags&post=528"}],"curies":[{"name":"wp","href":"https:\/\/api.w.org\/{rel}","templated":true}]}}