Tyler Suard
Published © CC BY-NC

Train a 65,536 Context Length LLM on A Single GPU in Colab

Good Lord, that's a lotta context!

BeginnerWork in progress1 hour174
Train a 65,536 Context Length LLM on A Single GPU in Colab

Things used in this project

Software apps and online services

Google Colab
pytorch

Story

Read more

Schematics

Architecture Diagram

Code

Attention Trees Colab Notebook

Python
You may NOT use this for commercial purposes without my permission. If you want to use it for non-commercial research then attribute me please.
{
  "cells": [
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "id": "d3sE2Sk7dGi-",
        "outputId": "73fb65d0-4cb2-434e-da04-6d9168181562"
      },
      "outputs": [
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "Collecting transformers\n",
            "  Downloading transformers-4.31.0-py3-none-any.whl (7.4 MB)\n",
            "\u001b[2K     \u001b[90m\u001b[0m \u001b[32m7.4/7.4 MB\u001b[0m \u001b[31m50.6 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
            "\u001b[?25hRequirement already satisfied: filelock in /usr/local/lib/python3.10/dist-packages (from transformers) (3.12.2)\n",
            "Collecting huggingface-hub<1.0,>=0.14.1 (from transformers)\n",
            "  Downloading huggingface_hub-0.16.4-py3-none-any.whl (268 kB)\n",
            "\u001b[2K     \u001b[90m\u001b[0m \u001b[32m268.8/268.8 kB\u001b[0m \u001b[31m23.3 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
            "\u001b[?25hRequirement already satisfied: numpy>=1.17 in /usr/local/lib/python3.10/dist-packages (from transformers) (1.22.4)\n",
            "Requirement already satisfied: packaging>=20.0 in /usr/local/lib/python3.10/dist-packages (from transformers) (23.1)\n",
            "Requirement already satisfied: pyyaml>=5.1 in /usr/local/lib/python3.10/dist-packages (from transformers) (6.0.1)\n",
            "Requirement already satisfied: regex!=2019.12.17 in /usr/local/lib/python3.10/dist-packages (from transformers) (2022.10.31)\n",
            "Requirement already satisfied: requests in /usr/local/lib/python3.10/dist-packages (from transformers) (2.27.1)\n",
            "Collecting tokenizers!=0.11.3,<0.14,>=0.11.1 (from transformers)\n",
            "  Downloading tokenizers-0.13.3-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (7.8 MB)\n",
            "\u001b[2K     \u001b[90m\u001b[0m \u001b[32m7.8/7.8 MB\u001b[0m \u001b[31m97.1 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
            "\u001b[?25hCollecting safetensors>=0.3.1 (from transformers)\n",
            "  Downloading safetensors-0.3.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (1.3 MB)\n",
            "\u001b[2K     \u001b[90m\u001b[0m \u001b[32m1.3/1.3 MB\u001b[0m \u001b[31m68.1 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
            "\u001b[?25hRequirement already satisfied: tqdm>=4.27 in /usr/local/lib/python3.10/dist-packages (from transformers) (4.65.0)\n",
            "Requirement already satisfied: fsspec in /usr/local/lib/python3.10/dist-packages (from huggingface-hub<1.0,>=0.14.1->transformers) (2023.6.0)\n",
            "Requirement already satisfied: typing-extensions>=3.7.4.3 in /usr/local/lib/python3.10/dist-packages (from huggingface-hub<1.0,>=0.14.1->transformers) (4.7.1)\n",
            "Requirement already satisfied: urllib3<1.27,>=1.21.1 in /usr/local/lib/python3.10/dist-packages (from requests->transformers) (1.26.16)\n",
            "Requirement already satisfied: certifi>=2017.4.17 in /usr/local/lib/python3.10/dist-packages (from requests->transformers) (2023.7.22)\n",
            "Requirement already satisfied: charset-normalizer~=2.0.0 in /usr/local/lib/python3.10/dist-packages (from requests->transformers) (2.0.12)\n",
            "Requirement already satisfied: idna<4,>=2.5 in /usr/local/lib/python3.10/dist-packages (from requests->transformers) (3.4)\n",
            "Installing collected packages: tokenizers, safetensors, huggingface-hub, transformers\n",
            "Successfully installed huggingface-hub-0.16.4 safetensors-0.3.1 tokenizers-0.13.3 transformers-4.31.0\n"
          ]
        }
      ],
      "source": [
        "!pip install transformers"
      ]
    },
    {
      "cell_type": "code",
      "source": [
        "from google.colab import drive\n",
        "drive.mount('/content/drive')"
      ],
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "id": "Np8kYsbw6Njn",
        "outputId": "fde4a33d-4044-4dc7-92d3-20b0f2c241f7"
      },
      "execution_count": null,
      "outputs": [
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "Mounted at /content/drive\n"
          ]
        }
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {
        "id": "kKlVPLKB4pnw",
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "outputId": "92d1e268-dca0-4b97-aaec-47b75613b1a7"
      },
      "outputs": [
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "torch.Size([1000])\n"
          ]
        }
      ],
      "source": [
        "import random\n",
        "import torch\n",
        "\n",
        "def make_random_input_ids(length, num_records):\n",
        "  dataset_list = []\n",
        "  for record in range(num_records):\n",
        "    record_ids = [random.randint(0,2000) for i in range(length)]\n",
        "    dataset_list.append(record_ids)\n",
        "  dataset_dict = {'input_ids': torch.tensor(dataset_list)}\n",
        "  dataset_dict['token_type_ids'] = torch.zeros(length, dtype=torch.long)\n",
        "  dataset_dict['attention_mask'] = torch.ones(length, dtype=torch.long)\n",
        "  return dataset_dict\n",
        "\n",
        "def make_random_labels(num_records):\n",
        "  labels_list = []\n",
        "  for record in range(num_records):\n",
        "    label = random.randint(0,1)\n",
        "    labels_list.append(label)\n",
        "  return torch.tensor(labels_list)\n",
        "\n",
        "def make_random_input_ids_and_labels(length, num_records):\n",
        "  return make_random_input_ids(length, num_records), make_random_labels(num_records)\n",
        "\n",
        "\n",
        "encoded_batch, labels = make_random_input_ids_and_labels(65536, 1000)\n",
        "# print(encoded_batch.size())\n",
        "print(labels.size())\n"
      ]
    },
    {
      "cell_type": "code",
      "source": [
        "torch.save(encoded_batch, '/content/drive/MyDrive/encoded_batch_random_65536.pt')\n",
        "torch.save(torch.tensor(labels), '/content/drive/MyDrive/random_labels_65536.pt')\n"
      ],
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "id": "R1AbSpTxCX3B",
        "outputId": "b919fb92-6c93-454d-d78f-83af5dd8040b"
      },
      "execution_count": null,
      "outputs": [
        {
          "output_type": "stream",
          "name": "stderr",
          "text": [
            "<ipython-input-5-664769eaf451>:2: UserWarning: To copy construct from a tensor, it is recommended to use sourceTensor.clone().detach() or sourceTensor.clone().detach().requires_grad_(True), rather than torch.tensor(sourceTensor).\n",
            "  torch.save(torch.tensor(labels), '/content/drive/MyDrive/random_labels_65536.pt')\n"
          ]
        }
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {
        "id": "IOBMCG_jraTz"
      },
      "outputs": [],
      "source": [
        "import torch\n",
        "\n",
        "# Load the tensors from a file\n",
        "encoded_batch = torch.load('/content/drive/MyDrive/encoded_batch_random_65536.pt')\n",
        "labels = torch.load('/content/drive/MyDrive/random_labels_65536.pt')\n"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {
        "id": "lJ2JpfIJipfe",
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "outputId": "99894f2d-151b-444b-9570-6c162b43b6f6"
      },
      "outputs": [
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "torch.Size([1000, 65536])\n",
            "torch.Size([65536])\n",
            "torch.Size([65536])\n"
          ]
        }
      ],
      "source": [
        "print(encoded_batch['input_ids'].size())\n",
        "print(encoded_batch['token_type_ids'].size())\n",
        "print(encoded_batch['attention_mask'].size())"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {
        "id": "NgJZtUKWcuhD",
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "outputId": "07761fab-6b3c-49d1-a272-87a8fe70369a"
      },
      "outputs": [
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "{'input_ids': tensor([[1197, 1605,   72,  ..., 1314,  181,   52],\n",
            "        [1085,  762, 1872,  ...,  197, 1829,  488],\n",
            "        [1381, 1159, 1768,  ..., 1044,  677,  585],\n",
            "        ...,\n",
            "        [ 488, 1118, 1872,  ...,  142, 1122, 1524],\n",
            "        [1507,  335, 1308,  ...,  831,  521,  515],\n",
            "        [ 406,  401, 1618,  ...,  873, 1734,  191]]), 'token_type_ids': tensor([0, 0, 0,  ..., 0, 0, 0]), 'attention_mask': tensor([1, 1, 1,  ..., 1, 1, 1])}\n",
            "tensor([0, 0, 1,  ..., 1, 0, 1])\n"
          ]
        }
      ],
      "source": [
        "print(encoded_batch)\n",
        "print(labels)"
      ]
    },
    {
      "cell_type": "markdown",
      "metadata": {
        "id": "zr4APWxsBwQA"
      },
      "source": [
        "## This is a transformer that takes in several sequences, and outputs a single vector summarizing each sequence.  My idea is to try and map that single vector out in the same embedding space as the tokens, and to try and find single tokens that summarize each sequence.  "
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {
        "id": "BX4PCgh77ROt"
      },
      "outputs": [],
      "source": [
        "import torch.nn as nn\n",
        "class PositionalEncoding(nn.Module):\n",
        "    def __init__(self, d_model, dropout=0.1, max_len=4090):\n",
        "        super(PositionalEncoding, self).__init__()\n",
        "        self.dropout = nn.Dropout(p=dropout)\n",
        "\n",
        "        pe = torch.zeros(max_len, d_model)\n",
        "        position = torch.arange(0, max_len, dtype=torch.float).unsqueeze(1)\n",
        "        div_term = torch.exp(torch.arange(0, d_model, 2).float() * (-math.log(10000.0) / d_model))\n",
        "        pe[:, 0::2] = torch.sin(position * div_term)\n",
        "        pe[:, 1::2] = torch.cos(position * div_term)\n",
        "        pe = pe.unsqueeze(0).transpose(0, 1)\n",
        "        self.register_buffer('pe', pe)\n",
        "\n",
        "    def forward(self, x):\n",
        "        x = x + self.pe[:x.size(0), :]\n",
        "        return self.dropout(x)\n"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {
        "id": "H_0kxvFQ_vId"
      },
      "outputs": [],
      "source": [
        "import torch\n",
        "import torch.nn as nn\n",
        "import math\n",
        "\n",
        "class Level1Model(nn.Module):\n",
        "    def __init__(self, vocab_size, embedding_size, nhead, hidden_dim, layers, dropout=0.5):\n",
        "        super(Level1Model, self).__init__()\n",
        "        self.embedding = nn.Embedding(vocab_size, embedding_size)\n",
        "        self.pos_encoder = PositionalEncoding(embedding_size)\n",
        "        encoder_layer = nn.TransformerEncoderLayer(embedding_size, nhead, hidden_dim, dropout)\n",
        "        self.transformer_encoder = nn.TransformerEncoder(encoder_layer, layers)\n",
        "\n",
        "    def forward(self, src):\n",
        "        embedded = self.embedding(src)\n",
        "        embedded = self.pos_encoder(embedded)\n",
        "        output = self.transformer_encoder(embedded)\n",
        "        output = output.mean(dim=1)\n",
        "        return output\n",
        "\n",
        "class Level2Model(nn.Module):\n",
        "  def __init__(self, vocab_size, embedding_size, nhead, hidden_dim, layers, dropout=0.5):\n",
        "    super(Level2Model, self).__init__()\n",
        "    self.pos_encoder = PositionalEncoding(embedding_size)\n",
        "    encoder_layer = nn.TransformerEncoderLayer(embedding_size, nhead, hidden_dim, dropout)\n",
        "    self.transformer_encoder = nn.TransformerEncoder(encoder_layer, layers)\n",
        "\n",
        "  def forward(self, src):\n",
        "      embedded = self.pos_encoder(src)\n",
        "      output = self.transformer_encoder(embedded)\n",
        "      output = output.mean(dim=1)\n",
        "      return output\n",
        "\n",
        "class Level3Model(nn.Module):\n",
        "  def __init__(self, vocab_size, embedding_size, nhead, hidden_dim, layers, dropout=0.5):\n",
        "    super(Level3Model, self).__init__()\n",
        "    self.pos_encoder = PositionalEncoding(embedding_size)\n",
        "    encoder_layer = nn.TransformerEncoderLayer(embedding_size, nhead, hidden_dim, dropout)\n",
        "    self.transformer_encoder = nn.TransformerEncoder(encoder_layer, layers)\n",
        "\n",
        "  def forward(self, src):\n",
        "      embedded = self.pos_encoder(src)\n",
        "      output = self.transformer_encoder(embedded)\n",
        "      output = output.mean(dim=1)\n",
        "      return output\n",
        "\n",
        "class Level5Model(nn.Module):\n",
        "  def __init__(self, vocab_size, embedding_size, nhead, hidden_dim, layers, dropout=0.5):\n",
        "    super(Level5Model, self).__init__()\n",
        "    self.pos_encoder = PositionalEncoding(embedding_size)\n",
        "    encoder_layer = nn.TransformerEncoderLayer(embedding_size, nhead, hidden_dim, dropout)\n",
        "    self.transformer_encoder = nn.TransformerEncoder(encoder_layer, layers)\n",
        "    self.classifier = nn.Linear(embedding_size, 1)\n",
        "\n",
        "  def forward(self, src):\n",
        "    embedded = self.pos_encoder(src)\n",
        "    output = self.transformer_encoder(embedded)\n",
        "    output = output.mean(dim=1)\n",
        "    output = self.classifier(output)\n",
        "    return output\n",
        "\n",
        "\n",
        "\n",
        "vocab_size = 30522  # This is the size of BERT's vocabulary\n",
        "hidden_dim = 768  # This is the dimension of BERT's hidden layers\n",
        "nheads = 12  # The number of heads in multi-headed attention models\n",
        "num_layers = 12  # The number of transformer layers\n",
        "dropout = 0.1  # The dropout rate\n",
        "\n",
        "# Instantiate the model\n",
        "model1 = Level1Model(vocab_size, hidden_dim, nheads, hidden_dim, num_layers, dropout)\n",
        "\n",
        "# If you want to move the model to GPU (if available)\n",
        "device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')\n",
        "model1 = model1.to(device)\n",
        "\n",
        "\n",
        "model2 = Level2Model(vocab_size, hidden_dim, nheads, hidden_dim, num_layers, dropout)\n",
        "\n",
        "# If you want to move the model to GPU (if available)\n",
        "model2 = model2.to(device)\n",
        "\n",
        "model3 = Level2Model(vocab_size, hidden_dim, nheads, hidden_dim, num_layers, dropout)\n",
        "\n",
        "# If you want to move the model to GPU (if available)\n",
        "model3 = model3.to(device)\n",
        "\n",
        "\n",
        "model4 = Level5Model(vocab_size, hidden_dim, nheads, hidden_dim, num_layers, dropout)\n",
        "\n",
        "# If you want to move the model to GPU (if available)\n",
        "model4 = model4.to(device)\n",
        "\n"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {
        "id": "wLs1bIYgCBkZ",
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "outputId": "db51f7ab-6ca7-43d2-c0e5-7e9f497ce3b0"
      },
      "outputs": [
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "Epoch #: 0\n",
            "Epoch: 0, Sequence: 0\n"
          ]
        },
        {
          "output_type": "stream",
          "name": "stderr",
          "text": [
            "<ipython-input-8-6dc111533c21>:32: UserWarning: To copy construct from a tensor, it is recommended to use sourceTensor.clone().detach() or sourceTensor.clone().detach().requires_grad_(True), rather than torch.tensor(sourceTensor).\n",
            "  label = torch.tensor(label)\n",
            "<ipython-input-8-6dc111533c21>:33: UserWarning: To copy construct from a tensor, it is recommended to use sourceTensor.clone().detach() or sourceTensor.clone().detach().requires_grad_(True), rather than torch.tensor(sourceTensor).\n",
            "  encoded_chunk = torch.tensor(encoded_chunk)\n"
          ]
        },
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "Level 1 Summarized 256 tokens into 16 vectors\n",
            "Level 1 Summarized 256 tokens into 16 vectors\n",
            "Level 1 Summarized 256 tokens into 16 vectors\n",
            "Level 1 Summarized 256 tokens into 16 vectors\n",
            "Level 1 Summarized 256 tokens into 16 vectors\n",
            "Level 1 Summarized 256 tokens into 16 vectors\n",
            "Level 1 Summarized 256 tokens into 16 vectors\n",
            "Level 1 Summarized 256 tokens into 16 vectors\n",
            "Level 1 Summarized 256 tokens into 16 vectors\n",
            "Level 1 Summarized 256 tokens into 16 vectors\n",
            "Level 1 Summarized 256 tokens into 16 vectors\n",
            "Level 1 Summarized 256 tokens into 16 vectors\n",
            "Level 1 Summarized 256 tokens into 16 vectors\n",
            "Level 1 Summarized 256 tokens into 16 vectors\n",
            "Level 1 Summarized 256 tokens into 16 vectors\n",
            "Level 1 Summarized 256 tokens into 16 vectors\n",
            "Level 2 summarized 4096 tokens into 16 vectors\n",
            "Level 1 Summarized 256 tokens into 16 vectors\n",
            "Level 1 Summarized 256 tokens into 16 vectors\n",
            "Level 1 Summarized 256 tokens into 16 vectors\n",
            "Level 1 Summarized 256 tokens into 16 vectors\n",
            "Level 1 Summarized 256 tokens into 16 vectors\n",
            "Level 1 Summarized 256 tokens into 16 vectors\n",
            "Level 1 Summarized 256 tokens into 16 vectors\n",
            "Level 1 Summarized 256 tokens into 16 vectors\n",
            "Level 1 Summarized 256 tokens into 16 vectors\n",
            "Level 1 Summarized 256 tokens into 16 vectors\n",
            "Level 1 Summarized 256 tokens into 16 vectors\n",
            "Level 1 Summarized 256 tokens into 16 vectors\n",
            "Level 1 Summarized 256 tokens into 16 vectors\n",
            "Level 1 Summarized 256 tokens into 16 vectors\n",
            "Level 1 Summarized 256 tokens into 16 vectors\n",
            "Level 1 Summarized 256 tokens into 16 vectors\n",
            "Level 2 summarized 4096 tokens into 16 vectors\n",
            "Level 1 Summarized 256 tokens into 16 vectors\n",
            "Level 1 Summarized 256 tokens into 16 vectors\n",
            "Level 1 Summarized 256 tokens into 16 vectors\n",
            "Level 1 Summarized 256 tokens into 16 vectors\n",
            "Level 1 Summarized 256 tokens into 16 vectors\n",
            "Level 1 Summarized 256 tokens into 16 vectors\n",
            "Level 1 Summarized 256 tokens into 16 vectors\n",
            "Level 1 Summarized 256 tokens into 16 vectors\n",
            "Level 1 Summarized 256 tokens into 16 vectors\n",
            "Level 1 Summarized 256 tokens into 16 vectors\n",
            "Level 1 Summarized 256 tokens into 16 vectors\n",
            "Level 1 Summarized 256 tokens into 16 vectors\n",
            "Level 1 Summarized 256 tokens into 16 vectors\n",
            "Level 1 Summarized 256 tokens into 16 vectors\n",
            "Level 1 Summarized 256 tokens into 16 vectors\n",
            "Level 1 Summarized 256 tokens into 16 vectors\n",
            "Level 2 summarized 4096 tokens into 16 vectors\n",
            "Level 1 Summarized 256 tokens into 16 vectors\n",
            "Level 1 Summarized 256 tokens into 16 vectors\n",
            "Level 1 Summarized 256 tokens into 16 vectors\n",
            "Level 1 Summarized 256 tokens into 16 vectors\n",
            "Level 1 Summarized 256 tokens into 16 vectors\n",
            "Level 1 Summarized 256 tokens into 16 vectors\n",
            "Level 1 Summarized 256 tokens into 16 vectors\n",
            "Level 1 Summarized 256 tokens into 16 vectors\n",
            "Level 1 Summarized 256 tokens into 16 vectors\n",
            "Level 1 Summarized 256 tokens into 16 vectors\n",
            "Level 1 Summarized 256 tokens into 16 vectors\n",
            "Level 1 Summarized 256 tokens into 16 vectors\n",
            "Level 1 Summarized 256 tokens into 16 vectors\n",
            "Level 1 Summarized 256 tokens into 16 vectors\n",
            "Level 1 Summarized 256 tokens into 16 vectors\n",
            "Level 1 Summarized 256 tokens into 16 vectors\n",
            "Level 2 summarized 4096 tokens into 16 vectors\n",
            "Level 1 Summarized 256 tokens into 16 vectors\n",
            "Level 1 Summarized 256 tokens into 16 vectors\n",
            "Level 1 Summarized 256 tokens into 16 vectors\n",
            "Level 1 Summarized 256 tokens into 16 vectors\n",
            "Level 1 Summarized 256 tokens into 16 vectors\n",
            "Level 1 Summarized 256 tokens into 16 vectors\n",
            "Level 1 Summarized 256 tokens into 16 vectors\n",
            "Level 1 Summarized 256 tokens into 16 vectors\n",
            "Level 1 Summarized 256 tokens into 16 vectors\n",
            "Level 1 Summarized 256 tokens into 16 vectors\n",
            "Level 1 Summarized 256 tokens into 16 vectors\n",
            "Level 1 Summarized 256 tokens into 16 vectors\n",
            "Level 1 Summarized 256 tokens into 16 vectors\n",
            "Level 1 Summarized 256 tokens into 16 vectors\n",
            "Level 1 Summarized 256 tokens into 16 vectors\n",
            "Level 1 Summarized 256 tokens into 16 vectors\n",
            "Level 2 summarized 4096 tokens into 16 vectors\n",
            "Level 1 Summarized 256 tokens into 16 vectors\n",
            "Level 1 Summarized 256 tokens into 16 vectors\n",
            "Level 1 Summarized 256 tokens into 16 vectors\n",
            "Level 1 Summarized 256 tokens into 16 vectors\n",
            "Level 1 Summarized 256 tokens into 16 vectors\n",
            "Level 1 Summarized 256 tokens into 16 vectors\n",
            "Level 1 Summarized 256 tokens into 16 vectors\n",
            "Level 1 Summarized 256 tokens into 16 vectors\n",
            "Level 1 Summarized 256 tokens into 16 vectors\n",
            "Level 1 Summarized 256 tokens into 16 vectors\n",
            "Level 1 Summarized 256 tokens into 16 vectors\n",
            "Level 1 Summarized 256 tokens into 16 vectors\n",
            "Level 1 Summarized 256 tokens into 16 vectors\n",
            "Level 1 Summarized 256 tokens into 16 vectors\n",
            "Level 1 Summarized 256 tokens into 16 vectors\n",
            "Level 1 Summarized 256 tokens into 16 vectors\n",
            "Level 2 summarized 4096 tokens into 16 vectors\n",
            "Level 1 Summarized 256 tokens into 16 vectors\n",
            "Level 1 Summarized 256 tokens into 16 vectors\n",
            "Level 1 Summarized 256 tokens into 16 vectors\n",
            "Level 1 Summarized 256 tokens into 16 vectors\n",
            "Level 1 Summarized 256 tokens into 16 vectors\n",
            "Level 1 Summarized 256 tokens into 16 vectors\n",
            "Level 1 Summarized 256 tokens into 16 vectors\n",
            "Level 1 Summarized 256 tokens into 16 vectors\n",
            "Level 1 Summarized 256 tokens into 16 vectors\n",
            "Level 1 Summarized 256 tokens into 16 vectors\n",
            "Level 1 Summarized 256 tokens into 16 vectors\n",
            "Level 1 Summarized 256 tokens into 16 vectors\n",
            "Level 1 Summarized 256 tokens into 16 vectors\n",
            "Level 1 Summarized 256 tokens into 16 vectors\n",
            "Level 1 Summarized 256 tokens into 16 vectors\n",
            "Level 1 Summarized 256 tokens into 16 vectors\n",
            "Level 2 summarized 4096 tokens into 16 vectors\n",
            "Level 1 Summarized 256 tokens into 16 vectors\n",
            "Level 1 Summarized 256 tokens into 16 vectors\n",
            "Level 1 Summarized 256 tokens into 16 vectors\n",
            "Level 1 Summarized 256 tokens into 16 vectors\n",
            "Level 1 Summarized 256 tokens into 16 vectors\n",
            "Level 1 Summarized 256 tokens into 16 vectors\n",
            "Level 1 Summarized 256 tokens into 16 vectors\n",
            "Level 1 Summarized 256 tokens into 16 vectors\n",
            "Level 1 Summarized 256 tokens into 16 vectors\n",
            "Level 1 Summarized 256 tokens into 16 vectors\n",
            "Level 1 Summarized 256 tokens into 16 vectors\n",
            "Level 1 Summarized 256 tokens into 16 vectors\n",
            "Level 1 Summarized 256 tokens into 16 vectors\n",
            "Level 1 Summarized 256 tokens into 16 vectors\n",
            "Level 1 Summarized 256 tokens into 16 vectors\n",
            "Level 1 Summarized 256 tokens into 16 vectors\n",
            "Level 2 summarized 4096 tokens into 16 vectors\n",
            "Level 1 Summarized 256 tokens into 16 vectors\n",
            "Level 1 Summarized 256 tokens into 16 vectors\n",
            "Level 1 Summarized 256 tokens into 16 vectors\n",
            "Level 1 Summarized 256 tokens into 16 vectors\n",
            "Level 1 Summarized 256 tokens into 16 vectors\n",
            "Level 1 Summarized 256 tokens into 16 vectors\n",
            "Level 1 Summarized 256 tokens into 16 vectors\n",
            "Level 1 Summarized 256 tokens into 16 vectors\n",
            "Level 1 Summarized 256 tokens into 16 vectors\n",
            "Level 1 Summarized 256 tokens into 16 vectors\n",
            "Level 1 Summarized 256 tokens into 16 vectors\n",
            "Level 1 Summarized 256 tokens into 16 vectors\n",
            "Level 1 Summarized 256 tokens into 16 vectors\n",
            "Level 1 Summarized 256 tokens into 16 vectors\n",
            "Level 1 Summarized 256 tokens into 16 vectors\n",
            "Level 1 Summarized 256 tokens into 16 vectors\n",
            "Level 2 summarized 4096 tokens into 16 vectors\n",
            "Level 1 Summarized 256 tokens into 16 vectors\n",
            "Level 1 Summarized 256 tokens into 16 vectors\n",
            "Level 1 Summarized 256 tokens into 16 vectors\n",
            "Level 1 Summarized 256 tokens into 16 vectors\n",
            "Level 1 Summarized 256 tokens into 16 vectors\n",
            "Level 1 Summarized 256 tokens into 16 vectors\n",
            "Level 1 Summarized 256 tokens into 16 vectors\n",
            "Level 1 Summarized 256 tokens into 16 vectors\n",
            "Level 1 Summarized 256 tokens into 16 vectors\n",
            "Level 1 Summarized 256 tokens into 16 vectors\n",
            "Level 1 Summarized 256 tokens into 16 vectors\n",
            "Level 1 Summarized 256 tokens into 16 vectors\n",
            "Level 1 Summarized 256 tokens into 16 vectors\n",
            "Level 1 Summarized 256 tokens into 16 vectors\n",
            "Level 1 Summarized 256 tokens into 16 vectors\n",
            "Level 1 Summarized 256 tokens into 16 vectors\n",
            "Level 2 summarized 4096 tokens into 16 vectors\n",
            "Level 1 Summarized 256 tokens into 16 vectors\n",
            "Level 1 Summarized 256 tokens into 16 vectors\n",
            "Level 1 Summarized 256 tokens into 16 vectors\n",
            "Level 1 Summarized 256 tokens into 16 vectors\n",
            "Level 1 Summarized 256 tokens into 16 vectors\n",
            "Level 1 Summarized 256 tokens into 16 vectors\n",
            "Level 1 Summarized 256 tokens into 16 vectors\n",
            "Level 1 Summarized 256 tokens into 16 vectors\n",
            "Level 1 Summarized 256 tokens into 16 vectors\n",
            "Level 1 Summarized 256 tokens into 16 vectors\n",
            "Level 1 Summarized 256 tokens into 16 vectors\n",
            "Level 1 Summarized 256 tokens into 16 vectors\n",
            "Level 1 Summarized 256 tokens into 16 vectors\n",
            "Level 1 Summarized 256 tokens into 16 vectors\n",
            "Level 1 Summarized 256 tokens into 16 vectors\n",
            "Level 1 Summarized 256 tokens into 16 vectors\n",
            "Level 2 summarized 4096 tokens into 16 vectors\n",
            "Level 1 Summarized 256 tokens into 16 vectors\n",
            "Level 1 Summarized 256 tokens into 16 vectors\n",
            "Level 1 Summarized 256 tokens into 16 vectors\n",
            "Level 1 Summarized 256 tokens into 16 vectors\n",
            "Level 1 Summarized 256 tokens into 16 vectors\n",
            "Level 1 Summarized 256 tokens into 16 vectors\n",
            "Level 1 Summarized 256 tokens into 16 vectors\n",
            "Level 1 Summarized 256 tokens into 16 vectors\n",
            "Level 1 Summarized 256 tokens into 16 vectors\n",
            "Level 1 Summarized 256 tokens into 16 vectors\n",
            "Level 1 Summarized 256 tokens into 16 vectors\n",
            "Level 1 Summarized 256 tokens into 16 vectors\n",
            "Level 1 Summarized 256 tokens into 16 vectors\n",
            "Level 1 Summarized 256 tokens into 16 vectors\n",
            "Level 1 Summarized 256 tokens into 16 vectors\n",
            "Level 1 Summarized 256 tokens into 16 vectors\n",
            "Level 2 summarized 4096 tokens into 16 vectors\n",
            "Level 1 Summarized 256 tokens into 16 vectors\n",
            "Level 1 Summarized 256 tokens into 16 vectors\n",
            "Level 1 Summarized 256 tokens into 16 vectors\n",
            "Level 1 Summarized 256 tokens into 16 vectors\n",
            "Level 1 Summarized 256 tokens into 16 vectors\n",
            "Level 1 Summarized 256 tokens into 16 vectors\n",
            "Level 1 Summarized 256 tokens into 16 vectors\n",
            "Level 1 Summarized 256 tokens into 16 vectors\n",
            "Level 1 Summarized 256 tokens into 16 vectors\n",
            "Level 1 Summarized 256 tokens into 16 vectors\n",
            "Level 1 Summarized 256 tokens into 16 vectors\n",
            "Level 1 Summarized 256 tokens into 16 vectors\n",
            "Level 1 Summarized 256 tokens into 16 vectors\n",
            "Level 1 Summarized 256 tokens into 16 vectors\n",
            "Level 1 Summarized 256 tokens into 16 vectors\n",
            "Level 1 Summarized 256 tokens into 16 vectors\n",
            "Level 2 summarized 4096 tokens into 16 vectors\n",
            "Level 1 Summarized 256 tokens into 16 vectors\n",
            "Level 1 Summarized 256 tokens into 16 vectors\n",
            "Level 1 Summarized 256 tokens into 16 vectors\n",
            "Level 1 Summarized 256 tokens into 16 vectors\n",
            "Level 1 Summarized 256 tokens into 16 vectors\n",
            "Level 1 Summarized 256 tokens into 16 vectors\n",
            "Level 1 Summarized 256 tokens into 16 vectors\n",
            "Level 1 Summarized 256 tokens into 16 vectors\n",
            "Level 1 Summarized 256 tokens into 16 vectors\n",
            "Level 1 Summarized 256 tokens into 16 vectors\n",
            "Level 1 Summarized 256 tokens into 16 vectors\n",
            "Level 1 Summarized 256 tokens into 16 vectors\n",
            "Level 1 Summarized 256 tokens into 16 vectors\n",
            "Level 1 Summarized 256 tokens into 16 vectors\n",
            "Level 1 Summarized 256 tokens into 16 vectors\n",
            "Level 1 Summarized 256 tokens into 16 vectors\n",
            "Level 2 summarized 4096 tokens into 16 vectors\n",
            "Level 1 Summarized 256 tokens into 16 vectors\n",
            "Level 1 Summarized 256 tokens into 16 vectors\n",
            "Level 1 Summarized 256 tokens into 16 vectors\n",
            "Level 1 Summarized 256 tokens into 16 vectors\n",
            "Level 1 Summarized 256 tokens into 16 vectors\n",
            "Level 1 Summarized 256 tokens into 16 vectors\n",
            "Level 1 Summarized 256 tokens into 16 vectors\n",
            "Level 1 Summarized 256 tokens into 16 vectors\n",
            "Level 1 Summarized 256 tokens into 16 vectors\n",
            "Level 1 Summarized 256 tokens into 16 vectors\n",
            "Level 1 Summarized 256 tokens into 16 vectors\n",
            "Level 1 Summarized 256 tokens into 16 vectors\n",
            "Level 1 Summarized 256 tokens into 16 vectors\n",
            "Level 1 Summarized 256 tokens into 16 vectors\n",
            "Level 1 Summarized 256 tokens into 16 vectors\n",
            "Level 1 Summarized 256 tokens into 16 vectors\n",
            "Level 2 summarized 4096 tokens into 16 vectors\n",
            "Level 1 Summarized 256 tokens into 16 vectors\n",
            "Level 1 Summarized 256 tokens into 16 vectors\n",
            "Level 1 Summarized 256 tokens into 16 vectors\n",
            "Level 1 Summarized 256 tokens into 16 vectors\n",
            "Level 1 Summarized 256 tokens into 16 vectors\n",
            "Level 1 Summarized 256 tokens into 16 vectors\n",
            "Level 1 Summarized 256 tokens into 16 vectors\n",
            "Level 1 Summarized 256 tokens into 16 vectors\n",
            "Level 1 Summarized 256 tokens into 16 vectors\n",
            "Level 1 Summarized 256 tokens into 16 vectors\n",
            "Level 1 Summarized 256 tokens into 16 vectors\n",
            "Level 1 Summarized 256 tokens into 16 vectors\n",
            "Level 1 Summarized 256 tokens into 16 vectors\n",
            "Level 1 Summarized 256 tokens into 16 vectors\n",
            "Level 1 Summarized 256 tokens into 16 vectors\n",
            "Level 1 Summarized 256 tokens into 16 vectors\n",
            "Level 2 summarized 4096 tokens into 16 vectors\n",
            "16 summary vectors received from level 3, sending to level 4...\n",
            "calculating loss...\n",
            "tensor(1.1518, device='cuda:0',\n",
            "       grad_fn=<BinaryCrossEntropyWithLogitsBackward0>)\n",
            "propagating loss backward through NN...\n",
            "Epoch: 0, Sequence: 1\n",
            "Level 1 Summarized 256 tokens into 16 vectors\n",
            "Level 1 Summarized 256 tokens into 16 vectors\n",
            "Level 1 Summarized 256 tokens into 16 vectors\n",
            "Level 1 Summarized 256 tokens into 16 vectors\n",
            "Level 1 Summarized 256 tokens into 16 vectors\n",
            "Level 1 Summarized 256 tokens into 16 vectors\n",
            "Level 1 Summarized 256 tokens into 16 vectors\n",
            "Level 1 Summarized 256 tokens into 16 vectors\n",
            "Level 1 Summarized 256 tokens into 16 vectors\n",
            "Level 1 Summarized 256 tokens into 16 vectors\n",
            "Level 1 Summarized 256 tokens into 16 vectors\n",
            "Level 1 Summarized 256 tokens into 16 vectors\n",
            "Level 1 Summarized 256 tokens into 16 vectors\n",
            "Level 1 Summarized 256 tokens into 16 vectors\n",
            "Level 1 Summarized 256 tokens into 16 vectors\n",
            "Level 1 Summarized 256 tokens into 16 vectors\n",
            "Level 2 summarized 4096 tokens into 16 vectors\n",
            "Level 1 Summarized 256 tokens into 16 vectors\n",
            "Level 1 Summarized 256 tokens into 16 vectors\n",
            "Level 1 Summarized 256 tokens into 16 vectors\n",
            "Level 1 Summarized 256 tokens into 16 vectors\n",
            "Level 1 Summarized 256 tokens into 16 vectors\n",
            "Level 1 Summarized 256 tokens into 16 vectors\n",
            "Level 1 Summarized 256 tokens into 16 vectors\n",
            "Level 1 Summarized 256 tokens into 16 vectors\n",
            "Level 1 Summarized 256 tokens into 16 vectors\n",
            "Level 1 Summarized 256 tokens into 16 vectors\n",
            "Level 1 Summarized 256 tokens into 16 vectors\n",
            "Level 1 Summarized 256 tokens into 16 vectors\n",
            "Level 1 Summarized 256 tokens into 16 vectors\n",
            "Level 1 Summarized 256 tokens into 16 vectors\n",
            "Level 1 Summarized 256 tokens into 16 vectors\n",
            "Level 1 Summarized 256 tokens into 16 vectors\n",
            "Level 2 summarized 4096 tokens into 16 vectors\n",
            "Level 1 Summarized 256 tokens into 16 vectors\n",
            "Level 1 Summarized 256 tokens into 16 vectors\n",
            "Level 1 Summarized 256 tokens into 16 vectors\n",
            "Level 1 Summarized 256 tokens into 16 vectors\n",
            "Level 1 Summarized 256 tokens into 16 vectors\n",
            "Level 1 Summarized 256 tokens into 16 vectors\n",
            "Level 1 Summarized 256 tokens into 16 vectors\n",
            "Level 1 Summarized 256 tokens into 16 vectors\n",
            "Level 1 Summarized 256 tokens into 16 vectors\n",
            "Level 1 Summarized 256 tokens into 16 vectors\n",
            "Level 1 Summarized 256 tokens into 16 vectors\n",
            "Level 1 Summarized 256 tokens into 16 vectors\n",
            "Level 1 Summarized 256 tokens into 16 vectors\n",
            "Level 1 Summarized 256 tokens into 16 vectors\n",
            "Level 1 Summarized 256 tokens into 16 vectors\n",
            "Level 1 Summarized 256 tokens into 16 vectors\n",
            "Level 2 summarized 4096 tokens into 16 vectors\n",
            "Level 1 Summarized 256 tokens into 16 vectors\n",
            "Level 1 Summarized 256 tokens into 16 vectors\n",
            "Level 1 Summarized 256 tokens into 16 vectors\n",
            "Level 1 Summarized 256 tokens into 16 vectors\n",
            "Level 1 Summarized 256 tokens into 16 vectors\n",
            "Level 1 Summarized 256 tokens into 16 vectors\n",
            "Level 1 Summarized 256 tokens into 16 vectors\n",
            "Level 1 Summarized 256 tokens into 16 vectors\n",
            "Level 1 Summarized 256 tokens into 16 vectors\n",
            "Level 1 Summarized 256 tokens into 16 vectors\n",
            "Level 1 Summarized 256 tokens into 16 vectors\n",
            "Level 1 Summarized 256 tokens into 16 vectors\n",
            "Level 1 Summarized 256 tokens into 16 vectors\n",
            "Level 1 Summarized 256 tokens into 16 vectors\n",
            "Level 1 Summarized 256 tokens into 16 vectors\n",
            "Level 1 Summarized 256 tokens into 16 vectors\n",
            "Level 2 summarized 4096 tokens into 16 vectors\n",
            "Level 1 Summarized 256 tokens into 16 vectors\n",
            "Level 1 Summarized 256 tokens into 16 vectors\n",
            "Level 1 Summarized 256 tokens into 16 vectors\n",
            "Level 1 Summarized 256 tokens into 16 vectors\n",
            "Level 1 Summarized 256 tokens into 16 vectors\n",
            "Level 1 Summarized 256 tokens into 16 vectors\n",
            "Level 1 Summarized 256 tokens into 16 vectors\n",
            "Level 1 Summarized 256 tokens into 16 vectors\n",
            "Level 1 Summarized 256 tokens into 16 vectors\n",
            "Level 1 Summarized 256 tokens into 16 vectors\n",
            "Level 1 Summarized 256 tokens into 16 vectors\n",
            "Level 1 Summarized 256 tokens into 16 vectors\n",
            "Level 1 Summarized 256 tokens into 16 vectors\n",
            "Level 1 Summarized 256 tokens into 16 vectors\n",
            "Level 1 Summarized 256 tokens into 16 vectors\n",
            "Level 1 Summarized 256 tokens into 16 vectors\n",
            "Level 2 summarized 4096 tokens into 16 vectors\n",
            "Level 1 Summarized 256 tokens into 16 vectors\n",
            "Level 1 Summarized 256 tokens into 16 vectors\n",
            "Level 1 Summarized 256 tokens into 16 vectors\n",
            "Level 1 Summarized 256 tokens into 16 vectors\n",
            "Level 1 Summarized 256 tokens into 16 vectors\n",
            "Level 1 Summarized 256 tokens into 16 vectors\n",
            "Level 1 Summarized 256 tokens into 16 vectors\n",
            "Level 1 Summarized 256 tokens into 16 vectors\n",
            "Level 1 Summarized 256 tokens into 16 vectors\n",
            "Level 1 Summarized 256 tokens into 16 vectors\n",
            "Level 1 Summarized 256 tokens into 16 vectors\n",
            "Level 1 Summarized 256 tokens into 16 vectors\n",
            "Level 1 Summarized 256 tokens into 16 vectors\n",
            "Level 1 Summarized 256 tokens into 16 vectors\n",
            "Level 1 Summarized 256 tokens into 16 vectors\n",
            "Level 1 Summarized 256 tokens into 16 vectors\n",
            "Level 2 summarized 4096 tokens into 16 vectors\n",
            "Level 1 Summarized 256 tokens into 16 vectors\n",
            "Level 1 Summarized 256 tokens into 16 vectors\n",
            "Level 1 Summarized 256 tokens into 16 vectors\n",
            "Level 1 Summarized 256 tokens into 16 vectors\n",
            "Level 1 Summarized 256 tokens into 16 vectors\n",
            "Level 1 Summarized 256 tokens into 16 vectors\n",
            "Level 1 Summarized 256 tokens into 16 vectors\n",
            "Level 1 Summarized 256 tokens into 16 vectors\n",
            "Level 1 Summarized 256 tokens into 16 vectors\n",
            "Level 1 Summarized 256 tokens into 16 vectors\n",
            "Level 1 Summarized 256 tokens into 16 vectors\n",
            "Level 1 Summarized 256 tokens into 16 vectors\n",
            "Level 1 Summarized 256 tokens into 16 vectors\n",
            "Level 1 Summarized 256 tokens into 16 vectors\n",
            "Level 1 Summarized 256 tokens into 16 vectors\n",
            "Level 1 Summarized 256 tokens into 16 vectors\n",
            "Level 2 summarized 4096 tokens into 16 vectors\n",
            "Level 1 Summarized 256 tokens into 16 vectors\n",
            "Level 1 Summarized 256 tokens into 16 vectors\n",
            "Level 1 Summarized 256 tokens into 16 vectors\n",
            "Level 1 Summarized 256 tokens into 16 vectors\n",
            "Level 1 Summarized 256 tokens into 16 vectors\n",
            "Level 1 Summarized 256 tokens into 16 vectors\n",
            "Level 1 Summarized 256 tokens into 16 vectors\n",
            "Level 1 Summarized 256 tokens into 16 vectors\n",
            "Level 1 Summarized 256 tokens into 16 vectors\n",
            "Level 1 Summarized 256 tokens into 16 vectors\n",
            "Level 1 Summarized 256 tokens into 16 vectors\n",
            "Level 1 Summarized 256 tokens into 16 vectors\n",
            "Level 1 Summarized 256 tokens into 16 vectors\n",
            "Level 1 Summarized 256 tokens into 16 vectors\n",
            "Level 1 Summarized 256 tokens into 16 vectors\n",
            "Level 1 Summarized 256 tokens into 16 vectors\n",
            "Level 2 summarized 4096 tokens into 16 vectors\n",
            "Level 1 Summarized 256 tokens into 16 vectors\n",
            "Level 1 Summarized 256 tokens into 16 vectors\n",
            "Level 1 Summarized 256 tokens into 16 vectors\n",
            "Level 1 Summarized 256 tokens into 16 vectors\n",
            "Level 1 Summarized 256 tokens into 16 vectors\n",
            "Level 1 Summarized 256 tokens into 16 vectors\n",
            "Level 1 Summarized 256 tokens into 16 vectors\n",
            "Level 1 Summarized 256 tokens into 16 vectors\n",
            "Level 1 Summarized 256 tokens into 16 vectors\n",
            "Level 1 Summarized 256 tokens into 16 vectors\n",
            "Level 1 Summarized 256 tokens into 16 vectors\n",
            "Level 1 Summarized 256 tokens into 16 vectors\n",
            "Level 1 Summarized 256 tokens into 16 vectors\n",
            "Level 1 Summarized 256 tokens into 16 vectors\n",
            "Level 1 Summarized 256 tokens into 16 vectors\n",
            "Level 1 Summarized 256 tokens into 16 vectors\n",
            "Level 2 summarized 4096 tokens into 16 vectors\n",
            "Level 1 Summarized 256 tokens into 16 vectors\n",
            "Level 1 Summarized 256 tokens into 16 vectors\n",
            "Level 1 Summarized 256 tokens into 16 vectors\n",
            "Level 1 Summarized 256 tokens into 16 vectors\n",
            "Level 1 Summarized 256 tokens into 16 vectors\n",
            "Level 1 Summarized 256 tokens into 16 vectors\n",
            "Level 1 Summarized 256 tokens into 16 vectors\n",
            "Level 1 Summarized 256 tokens into 16 vectors\n",
            "Level 1 Summarized 256 tokens into 16 vectors\n",
            "Level 1 Summarized 256 tokens into 16 vectors\n",
            "Level 1 Summarized 256 tokens into 16 vectors\n",
            "Level 1 Summarized 256 tokens into 16 vectors\n",
            "Level 1 Summarized 256 tokens into 16 vectors\n",
            "Level 1 Summarized 256 tokens into 16 vectors\n",
            "Level 1 Summarized 256 tokens into 16 vectors\n",
            "Level 1 Summarized 256 tokens into 16 vectors\n",
            "Level 2 summarized 4096 tokens into 16 vectors\n",
            "Level 1 Summarized 256 tokens into 16 vectors\n",
            "Level 1 Summarized 256 tokens into 16 vectors\n",
            "Level 1 Summarized 256 tokens into 16 vectors\n",
            "Level 1 Summarized 256 tokens into 16 vectors\n",
            "Level 1 Summarized 256 tokens into 16 vectors\n",
            "Level 1 Summarized 256 tokens into 16 vectors\n",
            "Level 1 Summarized 256 tokens into 16 vectors\n",
            "Level 1 Summarized 256 tokens into 16 vectors\n",
            "Level 1 Summarized 256 tokens into 16 vectors\n",
            "Level 1 Summarized 256 tokens into 16 vectors\n",
            "Level 1 Summarized 256 tokens into 16 vectors\n",
            "Level 1 Summarized 256 tokens into 16 vectors\n",
            "Level 1 Summarized 256 tokens into 16 vectors\n",
            "Level 1 Summarized 256 tokens into 16 vectors\n",
            "Level 1 Summarized 256 tokens into 16 vectors\n",
            "Level 1 Summarized 256 tokens into 16 vectors\n",
            "Level 2 summarized 4096 tokens into 16 vectors\n",
            "Level 1 Summarized 256 tokens into 16 vectors\n",
            "Level 1 Summarized 256 tokens into 16 vectors\n",
            "Level 1 Summarized 256 tokens into 16 vectors\n",
            "Level 1 Summarized 256 tokens into 16 vectors\n",
            "Level 1 Summarized 256 tokens into 16 vectors\n",
            "Level 1 Summarized 256 tokens into 16 vectors\n",
            "Level 1 Summarized 256 tokens into 16 vectors\n",
            "Level 1 Summarized 256 tokens into 16 vectors\n",
            "Level 1 Summarized 256 tokens into 16 vectors\n",
            "Level 1 Summarized 256 tokens into 16 vectors\n",
            "Level 1 Summarized 256 tokens into 16 vectors\n",
            "Level 1 Summarized 256 tokens into 16 vectors\n",
            "Level 1 Summarized 256 tokens into 16 vectors\n",
            "Level 1 Summarized 256 tokens into 16 vectors\n",
            "Level 1 Summarized 256 tokens into 16 vectors\n",
            "Level 1 Summarized 256 tokens into 16 vectors\n",
            "Level 2 summarized 4096 tokens into 16 vectors\n",
            "Level 1 Summarized 256 tokens into 16 vectors\n",
            "Level 1 Summarized 256 tokens into 16 vectors\n",
            "Level 1 Summarized 256 tokens into 16 vectors\n",
            "Level 1 Summarized 256 tokens into 16 vectors\n",
            "Level 1 Summarized 256 tokens into 16 vectors\n",
            "Level 1 Summarized 256 tokens into 16 vectors\n",
            "Level 1 Summarized 256 tokens into 16 vectors\n",
            "Level 1 Summarized 256 tokens into 16 vectors\n",
            "Level 1 Summarized 256 tokens into 16 vectors\n",
            "Level 1 Summarized 256 tokens into 16 vectors\n",
            "Level 1 Summarized 256 tokens into 16 vectors\n",
            "Level 1 Summarized 256 tokens into 16 vectors\n",
            "Level 1 Summarized 256 tokens into 16 vectors\n",
            "Level 1 Summarized 256 tokens into 16 vectors\n",
            "Level 1 Summarized 256 tokens into 16 vectors\n",
            "Level 1 Summarized 256 tokens into 16 vectors\n",
            "Level 2 summarized 4096 tokens into 16 vectors\n",
            "Level 1 Summarized 256 tokens into 16 vectors\n",
            "Level 1 Summarized 256 tokens into 16 vectors\n",
            "Level 1 Summarized 256 tokens into 16 vectors\n",
            "Level 1 Summarized 256 tokens into 16 vectors\n",
            "Level 1 Summarized 256 tokens into 16 vectors\n",
            "Level 1 Summarized 256 tokens into 16 vectors\n",
            "Level 1 Summarized 256 tokens into 16 vectors\n",
            "Level 1 Summarized 256 tokens into 16 vectors\n",
            "Level 1 Summarized 256 tokens into 16 vectors\n",
            "Level 1 Summarized 256 tokens into 16 vectors\n",
            "Level 1 Summarized 256 tokens into 16 vectors\n",
            "Level 1 Summarized 256 tokens into 16 vectors\n",
            "Level 1 Summarized 256 tokens into 16 vectors\n",
            "Level 1 Summarized 256 tokens into 16 vectors\n",
            "Level 1 Summarized 256 tokens into 16 vectors\n",
            "Level 1 Summarized 256 tokens into 16 vectors\n",
            "Level 2 summarized 4096 tokens into 16 vectors\n",
            "Level 1 Summarized 256 tokens into 16 vectors\n",
            "Level 1 Summarized 256 tokens into 16 vectors\n",
            "Level 1 Summarized 256 tokens into 16 vectors\n",
            "Level 1 Summarized 256 tokens into 16 vectors\n",
            "Level 1 Summarized 256 tokens into 16 vectors\n",
            "Level 1 Summarized 256 tokens into 16 vectors\n",
            "Level 1 Summarized 256 tokens into 16 vectors\n",
            "Level 1 Summarized 256 tokens into 16 vectors\n",
            "Level 1 Summarized 256 tokens into 16 vectors\n",
            "Level 1 Summarized 256 tokens into 16 vectors\n",
            "Level 1 Summarized 256 tokens into 16 vectors\n",
            "Level 1 Summarized 256 tokens into 16 vectors\n",
            "Level 1 Summarized 256 tokens into 16 vectors\n",
            "Level 1 Summarized 256 tokens into 16 vectors\n",
            "Level 1 Summarized 256 tokens into 16 vectors\n",
            "Level 1 Summarized 256 tokens into 16 vectors\n",
            "Level 2 summarized 4096 tokens into 16 vectors\n",
            "Level 1 Summarized 256 tokens into 16 vectors\n",
            "Level 1 Summarized 256 tokens into 16 vectors\n",
            "Level 1 Summarized 256 tokens into 16 vectors\n",
            "Level 1 Summarized 256 tokens into 16 vectors\n",
            "Level 1 Summarized 256 tokens into 16 vectors\n",
            "Level 1 Summarized 256 tokens into 16 vectors\n",
            "Level 1 Summarized 256 tokens into 16 vectors\n",
            "Level 1 Summarized 256 tokens into 16 vectors\n",
            "Level 1 Summarized 256 tokens into 16 vectors\n",
            "Level 1 Summarized 256 tokens into 16 vectors\n",
            "Level 1 Summarized 256 tokens into 16 vectors\n",
            "Level 1 Summarized 256 tokens into 16 vectors\n",
            "Level 1 Summarized 256 tokens into 16 vectors\n",
            "Level 1 Summarized 256 tokens into 16 vectors\n",
            "Level 1 Summarized 256 tokens into 16 vectors\n",
            "Level 1 Summarized 256 tokens into 16 vectors\n",
            "Level 2 summarized 4096 tokens into 16 vectors\n",
            "16 summary vectors received from level 3, sending to level 4...\n",
            "calculating loss...\n",
            "tensor(9.6116, device='cuda:0',\n",
            "       grad_fn=<BinaryCrossEntropyWithLogitsBackward0>)\n",
            "propagating loss backward through NN...\n",
            "Epoch: 0, Sequence: 2\n",
            "Level 1 Summarized 256 tokens into 16 vectors\n",
            "Level 1 Summarized 256 tokens into 16 vectors\n",
            "Level 1 Summarized 256 tokens into 16 vectors\n",
            "Level 1 Summarized 256 tokens into 16 vectors\n",
            "Level 1 Summarized 256 tokens into 16 vectors\n",
            "Level 1 Summarized 256 tokens into 16 vectors\n",
            "Level 1 Summarized 256 tokens into 16 vectors\n",
            "Level 1 Summarized 256 tokens into 16 vectors\n",
            "Level 1 Summarized 256 tokens into 16 vectors\n",
            "Level 1 Summarized 256 tokens into 16 vectors\n",
            "Level 1 Summarized 256 tokens into 16 vectors\n",
            "Level 1 Summarized 256 tokens into 16 vectors\n",
            "Level 1 Summarized 256 tokens into 16 vectors\n",
            "Level 1 Summarized 256 tokens into 16 vectors\n",
            "Level 1 Summarized 256 tokens into 16 vectors\n",
            "Level 1 Summarized 256 tokens into 16 vectors\n",
            "Level 2 summarized 4096 tokens into 16 vectors\n",
            "Level 1 Summarized 256 tokens into 16 vectors\n",
            "Level 1 Summarized 256 tokens into 16 vectors\n",
            "Level 1 Summarized 256 tokens into 16 vectors\n",
            "Level 1 Summarized 256 tokens into 16 vectors\n",
            "Level 1 Summarized 256 tokens into 16 vectors\n",
            "Level 1 Summarized 256 tokens into 16 vectors\n",
            "Level 1 Summarized 256 tokens into 16 vectors\n",
            "Level 1 Summarized 256 tokens into 16 vectors\n",
            "Level 1 Summarized 256 tokens into 16 vectors\n",
            "Level 1 Summarized 256 tokens into 16 vectors\n",
            "Level 1 Summarized 256 tokens into 16 vectors\n",
            "Level 1 Summarized 256 tokens into 16 vectors\n",
            "Level 1 Summarized 256 tokens into 16 vectors\n",
            "Level 1 Summarized 256 tokens into 16 vectors\n",
            "Level 1 Summarized 256 tokens into 16 vectors\n",
            "Level 1 Summarized 256 tokens into 16 vectors\n",
            "Level 2 summarized 4096 tokens into 16 vectors\n",
            "Level 1 Summarized 256 tokens into 16 vectors\n",
            "Level 1 Summarized 256 tokens into 16 vectors\n",
            "Level 1 Summarized 256 tokens into 16 vectors\n",
            "Level 1 Summarized 256 tokens into 16 vectors\n",
            "Level 1 Summarized 256 tokens into 16 vectors\n",
            "Level 1 Summarized 256 tokens into 16 vectors\n",
            "Level 1 Summarized 256 tokens into 16 vectors\n",
            "Level 1 Summarized 256 tokens into 16 vectors\n",
            "Level 1 Summarized 256 tokens into 16 vectors\n",
            "Level 1 Summarized 256 tokens into 16 vectors\n",
            "Level 1 Summarized 256 tokens into 16 vectors\n",
            "Level 1 Summarized 256 tokens into 16 vectors\n",
            "Level 1 Summarized 256 tokens into 16 vectors\n",
            "Level 1 Summarized 256 tokens into 16 vectors\n",
            "Level 1 Summarized 256 tokens into 16 vectors\n",
            "Level 1 Summarized 256 tokens into 16 vectors\n",
            "Level 2 summarized 4096 tokens into 16 vectors\n",
...

This file has been truncated, please download it to see its full contents.

Credits

Tyler Suard
17 projects • 18 followers
Artificial Intelligence Specialist
Thanks to ChatGPT.

Comments