{ "nbformat": 4, "nbformat_minor": 0, "metadata": { "colab": { "provenance": [], "gpuType": "A100" }, "kernelspec": { "name": "python3", "display_name": "Python 3" }, "language_info": { "name": "python" }, "accelerator": "GPU", "widgets": { "application/vnd.jupyter.widget-state+json": { "a0a81cb728564e88aee25e748b11133a": { "model_module": "@jupyter-widgets/controls", "model_name": "HBoxModel", "model_module_version": "1.5.0", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "HBoxModel", "_view_count": null, "_view_module": "@jupyter-widgets/controls", "_view_module_version": "1.5.0", "_view_name": "HBoxView", "box_style": "", "children": [ "IPY_MODEL_bd79f708cfcd4405a57fd09d310bd35a", "IPY_MODEL_a4ad4de496354084ae1e7b8647fde653", "IPY_MODEL_c7516cc979174de08e77cb797de2b7be" ], "layout": "IPY_MODEL_14a29f32e4af4f8184b26cb13a8c4767" } }, "bd79f708cfcd4405a57fd09d310bd35a": { "model_module": "@jupyter-widgets/controls", "model_name": "HTMLModel", "model_module_version": "1.5.0", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "HTMLModel", "_view_count": null, "_view_module": "@jupyter-widgets/controls", "_view_module_version": "1.5.0", "_view_name": "HTMLView", "description": "", "description_tooltip": null, "layout": "IPY_MODEL_6755d6aa3dc74d11bff72997ca80ed50", "placeholder": "​", "style": "IPY_MODEL_07b80f4c2fdd4f8b8048b524d0e204e7", "value": "tokenizer_nanogpt.py: " } }, "a4ad4de496354084ae1e7b8647fde653": { "model_module": "@jupyter-widgets/controls", "model_name": "FloatProgressModel", "model_module_version": "1.5.0", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "FloatProgressModel", "_view_count": null, "_view_module": "@jupyter-widgets/controls", "_view_module_version": "1.5.0", "_view_name": "ProgressView", "bar_style": "success", "description": "", "description_tooltip": null, "layout": "IPY_MODEL_fdf4039acc38437687ecb0d91f4c067e", "max": 1, "min": 0, "orientation": "horizontal", "style": "IPY_MODEL_31e58c8aa3f942939e3f5807373d737a", "value": 1 } }, "c7516cc979174de08e77cb797de2b7be": { "model_module": "@jupyter-widgets/controls", "model_name": "HTMLModel", "model_module_version": "1.5.0", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "HTMLModel", "_view_count": null, "_view_module": "@jupyter-widgets/controls", "_view_module_version": "1.5.0", "_view_name": "HTMLView", "description": "", "description_tooltip": null, "layout": "IPY_MODEL_f0254ddfd4514f3f9f75093ad9f1801e", "placeholder": "​", "style": "IPY_MODEL_cfbc1c6a33554bfe82986782e8b1c1c1", "value": " 2.39k/? [00:00<00:00, 77.5kB/s]" } }, "14a29f32e4af4f8184b26cb13a8c4767": { "model_module": "@jupyter-widgets/base", "model_name": "LayoutModel", "model_module_version": "1.2.0", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", "_model_name": "LayoutModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "LayoutView", "align_content": null, "align_items": null, "align_self": null, "border": null, "bottom": null, "display": null, "flex": null, "flex_flow": null, "grid_area": null, "grid_auto_columns": null, "grid_auto_flow": null, "grid_auto_rows": null, "grid_column": null, "grid_gap": null, "grid_row": null, "grid_template_areas": null, "grid_template_columns": null, "grid_template_rows": null, "height": null, "justify_content": null, "justify_items": null, "left": null, "margin": null, "max_height": null, "max_width": null, "min_height": null, "min_width": null, "object_fit": null, "object_position": null, "order": null, "overflow": null, "overflow_x": null, "overflow_y": null, "padding": null, "right": null, "top": null, "visibility": null, "width": null } }, "6755d6aa3dc74d11bff72997ca80ed50": { "model_module": "@jupyter-widgets/base", "model_name": "LayoutModel", "model_module_version": "1.2.0", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", "_model_name": "LayoutModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "LayoutView", "align_content": null, "align_items": null, "align_self": null, "border": null, "bottom": null, "display": null, "flex": null, "flex_flow": null, "grid_area": null, "grid_auto_columns": null, "grid_auto_flow": null, "grid_auto_rows": null, "grid_column": null, "grid_gap": null, "grid_row": null, "grid_template_areas": null, "grid_template_columns": null, "grid_template_rows": null, "height": null, "justify_content": null, "justify_items": null, "left": null, "margin": null, "max_height": null, "max_width": null, "min_height": null, "min_width": null, "object_fit": null, "object_position": null, "order": null, "overflow": null, "overflow_x": null, "overflow_y": null, "padding": null, "right": null, "top": null, "visibility": null, "width": null } }, "07b80f4c2fdd4f8b8048b524d0e204e7": { "model_module": "@jupyter-widgets/controls", "model_name": "DescriptionStyleModel", "model_module_version": "1.5.0", "state": { "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "DescriptionStyleModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "StyleView", "description_width": "" } }, "fdf4039acc38437687ecb0d91f4c067e": { "model_module": "@jupyter-widgets/base", "model_name": "LayoutModel", "model_module_version": "1.2.0", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", "_model_name": "LayoutModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "LayoutView", "align_content": null, "align_items": null, "align_self": null, "border": null, "bottom": null, "display": null, "flex": null, "flex_flow": null, "grid_area": null, "grid_auto_columns": null, "grid_auto_flow": null, "grid_auto_rows": null, "grid_column": null, "grid_gap": null, "grid_row": null, "grid_template_areas": null, "grid_template_columns": null, "grid_template_rows": null, "height": null, "justify_content": null, "justify_items": null, "left": null, "margin": null, "max_height": null, "max_width": null, "min_height": null, "min_width": null, "object_fit": null, "object_position": null, "order": null, "overflow": null, "overflow_x": null, "overflow_y": null, "padding": null, "right": null, "top": null, "visibility": null, "width": "20px" } }, "31e58c8aa3f942939e3f5807373d737a": { "model_module": "@jupyter-widgets/controls", "model_name": "ProgressStyleModel", "model_module_version": "1.5.0", "state": { "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "ProgressStyleModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "StyleView", "bar_color": null, "description_width": "" } }, "f0254ddfd4514f3f9f75093ad9f1801e": { "model_module": "@jupyter-widgets/base", "model_name": "LayoutModel", "model_module_version": "1.2.0", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", "_model_name": "LayoutModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "LayoutView", "align_content": null, "align_items": null, "align_self": null, "border": null, "bottom": null, "display": null, "flex": null, "flex_flow": null, "grid_area": null, "grid_auto_columns": null, "grid_auto_flow": null, "grid_auto_rows": null, "grid_column": null, "grid_gap": null, "grid_row": null, "grid_template_areas": null, "grid_template_columns": null, "grid_template_rows": null, "height": null, "justify_content": null, "justify_items": null, "left": null, "margin": null, "max_height": null, "max_width": null, "min_height": null, "min_width": null, "object_fit": null, "object_position": null, "order": null, "overflow": null, "overflow_x": null, "overflow_y": null, "padding": null, "right": null, "top": null, "visibility": null, "width": null } }, "cfbc1c6a33554bfe82986782e8b1c1c1": { "model_module": "@jupyter-widgets/controls", "model_name": "DescriptionStyleModel", "model_module_version": "1.5.0", "state": { "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "DescriptionStyleModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "StyleView", "description_width": "" } }, "f5a13e3fe6624fe7928e7656868de5a5": { "model_module": "@jupyter-widgets/controls", "model_name": "HBoxModel", "model_module_version": "1.5.0", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "HBoxModel", "_view_count": null, "_view_module": "@jupyter-widgets/controls", "_view_module_version": "1.5.0", "_view_name": "HBoxView", "box_style": "", "children": [ "IPY_MODEL_aa7aa11be8a14489893928fa5542098f", "IPY_MODEL_7731ce779e2343ed8dfb75d91fff85be", "IPY_MODEL_2932c2f5bf634d8385a45302f475aa97" ], "layout": "IPY_MODEL_fc5d32bdd43e41c6860c44cfddbd2d61" } }, "aa7aa11be8a14489893928fa5542098f": { "model_module": "@jupyter-widgets/controls", "model_name": "HTMLModel", "model_module_version": "1.5.0", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "HTMLModel", "_view_count": null, "_view_module": "@jupyter-widgets/controls", "_view_module_version": "1.5.0", "_view_name": "HTMLView", "description": "", "description_tooltip": null, "layout": "IPY_MODEL_66d77906d42449e090c3f0e5912793d2", "placeholder": "​", "style": "IPY_MODEL_c5c2d00b9b3f447e957b934fccf7db42", "value": "tokenizer.pkl: 100%" } }, "7731ce779e2343ed8dfb75d91fff85be": { "model_module": "@jupyter-widgets/controls", "model_name": "FloatProgressModel", "model_module_version": "1.5.0", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "FloatProgressModel", "_view_count": null, "_view_module": "@jupyter-widgets/controls", "_view_module_version": "1.5.0", "_view_name": "ProgressView", "bar_style": "success", "description": "", "description_tooltip": null, "layout": "IPY_MODEL_5afeebb4d064422786d575ee7d1b6226", "max": 846518, "min": 0, "orientation": "horizontal", "style": "IPY_MODEL_9df83a314c1a4bc3a9c39cd63ca6f182", "value": 846518 } }, "2932c2f5bf634d8385a45302f475aa97": { "model_module": "@jupyter-widgets/controls", "model_name": "HTMLModel", "model_module_version": "1.5.0", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "HTMLModel", "_view_count": null, "_view_module": "@jupyter-widgets/controls", "_view_module_version": "1.5.0", "_view_name": "HTMLView", "description": "", "description_tooltip": null, "layout": "IPY_MODEL_27546e720b2d4da29a272c8bb90fb351", "placeholder": "​", "style": "IPY_MODEL_14a3e3a938c5417f9727716bbb7d1564", "value": " 847k/847k [00:02<00:00, 285kB/s]" } }, "fc5d32bdd43e41c6860c44cfddbd2d61": { "model_module": "@jupyter-widgets/base", "model_name": "LayoutModel", "model_module_version": "1.2.0", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", "_model_name": "LayoutModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "LayoutView", "align_content": null, "align_items": null, "align_self": null, "border": null, "bottom": null, "display": null, "flex": null, "flex_flow": null, "grid_area": null, "grid_auto_columns": null, "grid_auto_flow": null, "grid_auto_rows": null, "grid_column": null, "grid_gap": null, "grid_row": null, "grid_template_areas": null, "grid_template_columns": null, "grid_template_rows": null, "height": null, "justify_content": null, "justify_items": null, "left": null, "margin": null, "max_height": null, "max_width": null, "min_height": null, "min_width": null, "object_fit": null, "object_position": null, "order": null, "overflow": null, "overflow_x": null, "overflow_y": null, "padding": null, "right": null, "top": null, "visibility": null, "width": null } }, "66d77906d42449e090c3f0e5912793d2": { "model_module": "@jupyter-widgets/base", "model_name": "LayoutModel", "model_module_version": "1.2.0", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", "_model_name": "LayoutModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "LayoutView", "align_content": null, "align_items": null, "align_self": null, "border": null, "bottom": null, "display": null, "flex": null, "flex_flow": null, "grid_area": null, "grid_auto_columns": null, "grid_auto_flow": null, "grid_auto_rows": null, "grid_column": null, "grid_gap": null, "grid_row": null, "grid_template_areas": null, "grid_template_columns": null, "grid_template_rows": null, "height": null, "justify_content": null, "justify_items": null, "left": null, "margin": null, "max_height": null, "max_width": null, "min_height": null, "min_width": null, "object_fit": null, "object_position": null, "order": null, "overflow": null, "overflow_x": null, "overflow_y": null, "padding": null, "right": null, "top": null, "visibility": null, "width": null } }, "c5c2d00b9b3f447e957b934fccf7db42": { "model_module": "@jupyter-widgets/controls", "model_name": "DescriptionStyleModel", "model_module_version": "1.5.0", "state": { "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "DescriptionStyleModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "StyleView", "description_width": "" } }, "5afeebb4d064422786d575ee7d1b6226": { "model_module": "@jupyter-widgets/base", "model_name": "LayoutModel", "model_module_version": "1.2.0", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", "_model_name": "LayoutModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "LayoutView", "align_content": null, "align_items": null, "align_self": null, "border": null, "bottom": null, "display": null, "flex": null, "flex_flow": null, "grid_area": null, "grid_auto_columns": null, "grid_auto_flow": null, "grid_auto_rows": null, "grid_column": null, "grid_gap": null, "grid_row": null, "grid_template_areas": null, "grid_template_columns": null, "grid_template_rows": null, "height": null, "justify_content": null, "justify_items": null, "left": null, "margin": null, "max_height": null, "max_width": null, "min_height": null, "min_width": null, "object_fit": null, "object_position": null, "order": null, "overflow": null, "overflow_x": null, "overflow_y": null, "padding": null, "right": null, "top": null, "visibility": null, "width": null } }, "9df83a314c1a4bc3a9c39cd63ca6f182": { "model_module": "@jupyter-widgets/controls", "model_name": "ProgressStyleModel", "model_module_version": "1.5.0", "state": { "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "ProgressStyleModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "StyleView", "bar_color": null, "description_width": "" } }, "27546e720b2d4da29a272c8bb90fb351": { "model_module": "@jupyter-widgets/base", "model_name": "LayoutModel", "model_module_version": "1.2.0", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", "_model_name": "LayoutModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "LayoutView", "align_content": null, "align_items": null, "align_self": null, "border": null, "bottom": null, "display": null, "flex": null, "flex_flow": null, "grid_area": null, "grid_auto_columns": null, "grid_auto_flow": null, "grid_auto_rows": null, "grid_column": null, "grid_gap": null, "grid_row": null, "grid_template_areas": null, "grid_template_columns": null, "grid_template_rows": null, "height": null, "justify_content": null, "justify_items": null, "left": null, "margin": null, "max_height": null, "max_width": null, "min_height": null, "min_width": null, "object_fit": null, "object_position": null, "order": null, "overflow": null, "overflow_x": null, "overflow_y": null, "padding": null, "right": null, "top": null, "visibility": null, "width": null } }, "14a3e3a938c5417f9727716bbb7d1564": { "model_module": "@jupyter-widgets/controls", "model_name": "DescriptionStyleModel", "model_module_version": "1.5.0", "state": { "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "DescriptionStyleModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "StyleView", "description_width": "" } } } } }, "cells": [ { "cell_type": "code", "execution_count": 4, "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "YCQ2SpNSFKkj", "outputId": "cb463fd3-d412-4e05-b67e-f001c17b58b0" }, "outputs": [ { "output_type": "stream", "name": "stderr", "text": [ "A new version of the following files was downloaded from https://huggingface.co/nanochat-students/base-d20:\n", "- configuration_nanogpt.py\n", ". Make sure to double-check they do not contain any added malicious code. To avoid downloading new versions of the code file, you can pin a revision.\n", "A new version of the following files was downloaded from https://huggingface.co/nanochat-students/base-d20:\n", "- modeling_nanogpt.py\n", ". Make sure to double-check they do not contain any added malicious code. To avoid downloading new versions of the code file, you can pin a revision.\n" ] }, { "output_type": "execute_result", "data": { "text/plain": [ "NanoGPTModel(\n", " (transformer): ModuleDict(\n", " (wte): Embedding(65536, 1280)\n", " (h): ModuleList(\n", " (0-19): 20 x Block(\n", " (attn): CausalSelfAttention(\n", " (c_q): Linear(in_features=1280, out_features=1280, bias=False)\n", " (c_k): Linear(in_features=1280, out_features=1280, bias=False)\n", " (c_v): Linear(in_features=1280, out_features=1280, bias=False)\n", " (c_proj): Linear(in_features=1280, out_features=1280, bias=False)\n", " )\n", " (mlp): MLP(\n", " (c_fc): Linear(in_features=1280, out_features=5120, bias=False)\n", " (c_proj): Linear(in_features=5120, out_features=1280, bias=False)\n", " )\n", " )\n", " )\n", " )\n", " (lm_head): Linear(in_features=1280, out_features=65536, bias=False)\n", ")" ] }, "metadata": {}, "execution_count": 4 } ], "source": [ "from transformers import AutoConfig, AutoModel, AutoTokenizer\n", "import torch\n", "\n", "model_dir = \"nanochat-students/base-d20\"\n", "device = torch.device(\"cuda\" if torch.cuda.is_available() else \"cpu\")\n", "model = AutoModel.from_pretrained(model_dir, trust_remote_code=True)\n", "model = model.to(device)\n", "model.eval()" ] }, { "cell_type": "code", "source": [ "tokenizer = AutoTokenizer.from_pretrained(model_dir, trust_remote_code=True)" ], "metadata": { "colab": { "base_uri": "https://localhost:8080/", "height": 133, "referenced_widgets": [ "a0a81cb728564e88aee25e748b11133a", "bd79f708cfcd4405a57fd09d310bd35a", "a4ad4de496354084ae1e7b8647fde653", "c7516cc979174de08e77cb797de2b7be", "14a29f32e4af4f8184b26cb13a8c4767", "6755d6aa3dc74d11bff72997ca80ed50", "07b80f4c2fdd4f8b8048b524d0e204e7", "fdf4039acc38437687ecb0d91f4c067e", "31e58c8aa3f942939e3f5807373d737a", "f0254ddfd4514f3f9f75093ad9f1801e", "cfbc1c6a33554bfe82986782e8b1c1c1", "f5a13e3fe6624fe7928e7656868de5a5", "aa7aa11be8a14489893928fa5542098f", "7731ce779e2343ed8dfb75d91fff85be", "2932c2f5bf634d8385a45302f475aa97", "fc5d32bdd43e41c6860c44cfddbd2d61", "66d77906d42449e090c3f0e5912793d2", "c5c2d00b9b3f447e957b934fccf7db42", "5afeebb4d064422786d575ee7d1b6226", "9df83a314c1a4bc3a9c39cd63ca6f182", "27546e720b2d4da29a272c8bb90fb351", "14a3e3a938c5417f9727716bbb7d1564" ] }, "id": "i6AWzw0-Hl6j", "outputId": "5069f13e-3162-4a5e-96af-1ed1712eef69" }, "execution_count": 2, "outputs": [ { "output_type": "display_data", "data": { "text/plain": [ "tokenizer_nanogpt.py: 0.00B [00:00, ?B/s]" ], "application/vnd.jupyter.widget-view+json": { "version_major": 2, "version_minor": 0, "model_id": "a0a81cb728564e88aee25e748b11133a" } }, "metadata": {} }, { "output_type": "stream", "name": "stderr", "text": [ "A new version of the following files was downloaded from https://huggingface.co/nanochat-students/base-d20:\n", "- tokenizer_nanogpt.py\n", ". Make sure to double-check they do not contain any added malicious code. To avoid downloading new versions of the code file, you can pin a revision.\n" ] }, { "output_type": "display_data", "data": { "text/plain": [ "tokenizer.pkl: 0%| | 0.00/847k [00:00The capital of Belgium is 11,000 square kilometers and is located in the north of the country. The capital of Belgium is Brussels. The capital of Belgium is located in the north of the country. The capital of Belgium is located in the north of the country. The\n" ] } ] } ] }