diff --git "a/scripts/scratch.ipynb" "b/scripts/scratch.ipynb" --- "a/scripts/scratch.ipynb" +++ "b/scripts/scratch.ipynb" @@ -621,10 +621,131 @@ }, { "cell_type": "code", - "execution_count": 3, + "execution_count": 2, "id": "ae045873", "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/usr/lib/python3/dist-packages/sklearn/utils/fixes.py:25: UserWarning: pkg_resources is deprecated as an API. See https://setuptools.pypa.io/en/latest/pkg_resources.html. The pkg_resources package is slated for removal as early as 2025-11-30. Refrain from using this package or pin to Setuptools<81.\n", + " from pkg_resources import parse_version # type: ignore\n", + "2025-11-23 08:38:00.662486: I tensorflow/core/util/port.cc:153] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.\n", + "2025-11-23 08:38:00.676959: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:467] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered\n", + "WARNING: All log messages before absl::InitializeLog() is called are written to STDERR\n", + "E0000 00:00:1763887080.694448 2395032 cuda_dnn.cc:8579] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered\n", + "E0000 00:00:1763887080.700029 2395032 cuda_blas.cc:1407] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered\n", + "W0000 00:00:1763887080.713829 2395032 computation_placer.cc:177] computation placer already registered. Please check linkage and avoid linking the same target more than once.\n", + "W0000 00:00:1763887080.713845 2395032 computation_placer.cc:177] computation placer already registered. Please check linkage and avoid linking the same target more than once.\n", + "W0000 00:00:1763887080.713847 2395032 computation_placer.cc:177] computation placer already registered. Please check linkage and avoid linking the same target more than once.\n", + "W0000 00:00:1763887080.713849 2395032 computation_placer.cc:177] computation placer already registered. Please check linkage and avoid linking the same target more than once.\n", + "2025-11-23 08:38:00.718634: I tensorflow/core/platform/cpu_feature_guard.cc:210] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.\n", + "To enable the following instructions: AVX512F AVX512_VNNI AVX512_BF16 AVX512_FP16 AVX_VNNI, in other operations, rebuild TensorFlow with the appropriate compiler flags.\n" + ] + }, + { + "ename": "AttributeError", + "evalue": "'MessageFactory' object has no attribute 'GetPrototype'", + "output_type": "error", + "traceback": [ + "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", + "\u001b[0;31mAttributeError\u001b[0m Traceback (most recent call last)", + "\u001b[0;31mAttributeError\u001b[0m: 'MessageFactory' object has no attribute 'GetPrototype'" + ] + }, + { + "ename": "AttributeError", + "evalue": "'MessageFactory' object has no attribute 'GetPrototype'", + "output_type": "error", + "traceback": [ + "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", + "\u001b[0;31mAttributeError\u001b[0m Traceback (most recent call last)", + "\u001b[0;31mAttributeError\u001b[0m: 'MessageFactory' object has no attribute 'GetPrototype'" + ] + }, + { + "ename": "AttributeError", + "evalue": "'MessageFactory' object has no attribute 'GetPrototype'", + "output_type": "error", + "traceback": [ + "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", + "\u001b[0;31mAttributeError\u001b[0m Traceback (most recent call last)", + "\u001b[0;31mAttributeError\u001b[0m: 'MessageFactory' object has no attribute 'GetPrototype'" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/home/ubuntu/.local/lib/python3.10/site-packages/google/api_core/_python_version_support.py:266: FutureWarning: You are using a Python version (3.10.12) which Google will stop supporting in new releases of google.api_core once it reaches its end of life (2026-10-04). Please upgrade to the latest Python version, or at least Python 3.11, to continue receiving updates for google.api_core past that date.\n", + " warnings.warn(message, FutureWarning)\n" + ] + }, + { + "ename": "AttributeError", + "evalue": "'MessageFactory' object has no attribute 'GetPrototype'", + "output_type": "error", + "traceback": [ + "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", + "\u001b[0;31mAttributeError\u001b[0m Traceback (most recent call last)", + "\u001b[0;31mAttributeError\u001b[0m: 'MessageFactory' object has no attribute 'GetPrototype'" + ] + }, + { + "ename": "AttributeError", + "evalue": "'MessageFactory' object has no attribute 'GetPrototype'", + "output_type": "error", + "traceback": [ + "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", + "\u001b[0;31mAttributeError\u001b[0m Traceback (most recent call last)", + "\u001b[0;31mAttributeError\u001b[0m: 'MessageFactory' object has no attribute 'GetPrototype'" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "Skipping import of cpp extensions due to incompatible torch version 2.9.1+cu128 for torchao version 0.14.1 Please see https://github.com/pytorch/ao/issues/2919 for more info\n", + "TMA benchmarks will be running without grid constant TMA descriptor.\n", + "WARNING:bitsandbytes.cextension:Could not find the bitsandbytes CUDA binary at PosixPath('/usr/local/lib/python3.10/dist-packages/bitsandbytes/libbitsandbytes_cuda128.so')\n", + "ERROR:bitsandbytes.cextension:Could not load bitsandbytes native library: /lib/x86_64-linux-gnu/libstdc++.so.6: version `GLIBCXX_3.4.32' not found (required by /usr/local/lib/python3.10/dist-packages/bitsandbytes/libbitsandbytes_cpu.so)\n", + "Traceback (most recent call last):\n", + " File \"/usr/local/lib/python3.10/dist-packages/bitsandbytes/cextension.py\", line 85, in \n", + " lib = get_native_library()\n", + " File \"/usr/local/lib/python3.10/dist-packages/bitsandbytes/cextension.py\", line 72, in get_native_library\n", + " dll = ct.cdll.LoadLibrary(str(binary_path))\n", + " File \"/usr/lib/python3.10/ctypes/__init__.py\", line 452, in LoadLibrary\n", + " return self._dlltype(name)\n", + " File \"/usr/lib/python3.10/ctypes/__init__.py\", line 374, in __init__\n", + " self._handle = _dlopen(self._name, mode)\n", + "OSError: /lib/x86_64-linux-gnu/libstdc++.so.6: version `GLIBCXX_3.4.32' not found (required by /usr/local/lib/python3.10/dist-packages/bitsandbytes/libbitsandbytes_cpu.so)\n", + "WARNING:bitsandbytes.cextension:\n", + "CUDA Setup failed despite CUDA being available. Please run the following command to get more information:\n", + "\n", + "python -m bitsandbytes\n", + "\n", + "Inspect the output of the command and see if you can locate CUDA libraries. You might need to add them\n", + "to your LD_LIBRARY_PATH. If you suspect a bug, please take the information from python -m bitsandbytes\n", + "and open an issue at: https://github.com/bitsandbytes-foundation/bitsandbytes/issues\n", + "\n" + ] + }, + { + "data": { + "application/vnd.jupyter.widget-view+json": { + "model_id": "8afb02201d53428282ee2fc85c9822dd", + "version_major": 2, + "version_minor": 0 + }, + "text/plain": [ + "Fetching 7 files: 0%| | 0/7 [00:00]\n", + "{'input_ids': tensor([[151644, 8948, 198, 74785, 279, 1376, 4419, 315, 279,\n", + " 1946, 2168, 320, 3423, 11, 6083, 11, 1379, 11,\n", + " 10434, 11, 6171, 11, 4004, 701, 1221, 10339, 1246,\n", + " 279, 1196, 594, 1467, 7600, 1265, 11596, 476, 5602,\n", + " 279, 2168, 13, 19813, 264, 501, 2168, 429, 20027,\n", + " 279, 1196, 594, 8502, 1393, 20337, 28137, 448, 279,\n", + " 4024, 1946, 1380, 8311, 13, 151645, 198, 151644, 872,\n", + " 198, 24669, 220, 16, 25, 220, 151652, 151655, 151655,\n", + " 151655, 151655, 151655, 151655, 151655, 151655, 151655, 151655, 151655,\n", + " 151655, 151655, 151655, 151655, 151655, 151655, 151655, 151655, 151655,\n", + " 151655, 151655, 151655, 151655, 151655, 151655, 151655, 151655, 151655,\n", + " 151655, 151655, 151655, 151655, 151655, 151655, 151655, 151655, 151655,\n", + " 151655, 151655, 151655, 151655, 151655, 151655, 151655, 151655, 151655,\n", + " 151655, 151655, 151655, 151655, 151655, 151655, 151655, 151655, 151655,\n", + " 151655, 151655, 151655, 151655, 151655, 151655, 151655, 151655, 151655,\n", + " 151655, 151655, 151655, 151655, 151655, 151655, 151655, 151655, 151655,\n", + " 151655, 151655, 151655, 151655, 151655, 151655, 151655, 151655, 151655,\n", + " 151655, 151655, 151655, 151655, 151655, 151655, 151655, 151655, 151655,\n", + " 151655, 151655, 151655, 151655, 151655, 151655, 151655, 151655, 151655,\n", + " 151655, 151655, 151655, 151655, 151655, 151655, 151655, 151655, 151655,\n", + " 151655, 151655, 151655, 151655, 151655, 151655, 151655, 151655, 151655,\n", + " 151655, 151655, 151655, 151655, 151655, 151655, 151655, 151655, 151655,\n", + " 151655, 151655, 151655, 151655, 151655, 151655, 151655, 151655, 151655,\n", + " 151655, 151655, 151655, 151655, 151655, 151655, 151655, 151655, 151655,\n", + " 151655, 151655, 151655, 151655, 151655, 151655, 151655, 151655, 151655,\n", + " 151655, 151655, 151655, 151655, 151655, 151655, 151655, 151655, 151655,\n", + " 151655, 151655, 151655, 151655, 151655, 151655, 151655, 151655, 151655,\n", + " 151655, 151655, 151655, 151655, 151655, 151655, 151655, 151655, 151655,\n", + " 151655, 151655, 151655, 151655, 151655, 151653, 44063, 105995, 69041,\n", + " 77559, 108215, 24, 15, 26381, 49720, 279, 6249, 220,\n", + " 24, 15, 12348, 311, 279, 2115, 13, 58230, 228,\n", + " 107620, 106438, 100462, 121751, 107372, 11999, 279, 6249, 311,\n", + " 264, 11958, 594, 46697, 1651, 13, 220, 58230, 228,\n", + " 105995, 46670, 17714, 80942, 63836, 105995, 11999, 279, 6249,\n", + " 311, 264, 6884, 34381, 18342, 13, 151645, 198, 151644,\n", + " 77091, 198]], device='cuda:0'), 'attention_mask': tensor([[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,\n", + " 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,\n", + " 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,\n", + " 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,\n", + " 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,\n", + " 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,\n", + " 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,\n", + " 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,\n", + " 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,\n", + " 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,\n", + " 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,\n", + " 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,\n", + " 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,\n", + " 1, 1, 1, 1, 1]], device='cuda:0'), 'pixel_values': tensor([[-1.7631, -1.7631, -1.7485, ..., -0.9114, -0.9114, -0.9114],\n", + " [-1.7631, -1.7485, -1.7631, ..., -0.5559, -0.5275, -0.5275],\n", + " [-1.7631, -1.7485, -1.7631, ..., -1.3096, -1.3096, -1.3096],\n", + " ...,\n", + " [-1.6901, -1.7193, -1.7193, ..., -0.5701, -0.5133, -0.4990],\n", + " [-1.6755, -1.6317, -1.6463, ..., -0.5986, -0.6270, -0.6981],\n", + " [-1.6901, -1.6901, -1.6755, ..., -0.6555, -0.6412, -0.6128]],\n", + " device='cuda:0'), 'image_grid_thw': tensor([[ 1, 22, 34]], device='cuda:0')}\n", + "_get_qwen_prompt_embeds, model_inputs.pixel_values\n", + "Shape: (748, 1176)\n", + "Min: -1.7922625541687012, Max: 2.145897150039673, Mean: -0.7493594884872437\n", + "Device: cuda:0, Dtype: torch.float32, Requires Grad: False\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ - "Time taken by QwenImageEditPlusPipeline.encode_prompt: 0.427993256947957 seconds\n", - "Time taken by Encode Prompt: 0.42811784299556166 seconds\n", - "Time taken by QwenImageEditPlusPipeline._encode_vae_image: 0.07400305196642876 seconds\n", + "encode_prompt, prompt_embeds\n", + "Shape: (1, 253, 3584)\n", + "Min: -167.0, Max: 126.0, Mean: -0.1279296875\n", + "Device: cuda:0, Dtype: torch.bfloat16, Requires Grad: False\n", + "Time taken by QwenImageEditPlusPipeline.encode_prompt: 0.6033668370218948 seconds\n", + "Time taken by Encode Prompt: 0.6034080770332366 seconds\n", + "_encode_vae_image, image\n", + "Shape: (1, 3, 1, 416, 640)\n", + "Min: -1.0, Max: 1.0, Mean: -0.50390625\n", + "Device: cuda:0, Dtype: torch.bfloat16, Requires Grad: False\n", + "_encode_vae_image, image_latents\n", + "Shape: (1, 16, 1, 52, 80)\n", + "Min: -1.734375, Max: 1.796875, Mean: -0.04541015625\n", + "Device: cuda:0, Dtype: torch.bfloat16, Requires Grad: False\n", + "Time taken by QwenImageEditPlusPipeline._encode_vae_image: 0.22572305297944695 seconds\n", "image_seq_len=4056\n", "mu=0.6915322580645161\n", - "timesteps=tensor([1000.0000, 766.4941, 455.3210, 20.0000], device='cuda:0')\n", - "Time taken by Prep gen: 0.10688352002762258 seconds\n" + "timesteps=tensor([1000.0000, 980.8338, 960.9587, 940.3351, 918.9197, 896.6658,\n", + " 873.5234, 849.4380, 824.3512, 798.1988, 770.9116, 742.4141,\n", + " 712.6240, 681.4511, 648.7971, 614.5536, 578.6018, 540.8107,\n", + " 501.0352, 459.1150, 414.8718, 368.1068, 318.5984, 266.0976,\n", + " 210.3248, 150.9643, 87.6583, 20.0000], device='cuda:0')\n", + "Time taken by Prep gen: 0.2313395309029147 seconds\n" ] }, { "data": { "application/vnd.jupyter.widget-view+json": { - "model_id": "296e7f68f6e24524be0a2f6d58020b3e", + "model_id": "a741c1cc9fa3431daa7126401712cf01", "version_major": 2, "version_minor": 0 }, "text/plain": [ - " 0%| | 0/4 [00:00]" + "" ] }, - "execution_count": 11, + "execution_count": 5, "metadata": {}, "output_type": "execute_result" } ], "source": [ - "foundation.base_pipe(QwenInputs(**inps[0]))" + "inps = QwenInputs(**inps[0])\n", + "inps.num_inference_steps = 28\n", + "outs = foundation.base_pipe(inps)\n", + "outs[0]" ] }, {