Skip to content

Commit 7dff2ed

Browse files
authored
Merge branch 'main' into swap
2 parents 8102565 + 2c231da commit 7dff2ed

File tree

9 files changed

+65
-21
lines changed

9 files changed

+65
-21
lines changed

.ci/docker/Dockerfile

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,11 @@ ENV DEBIAN_FRONTEND noninteractive
77
COPY ./common/install_base.sh install_base.sh
88
RUN bash ./install_base.sh && rm install_base.sh
99

10+
# Setup user
11+
# TODO: figure out how to remove this part
12+
COPY ./common/install_user.sh install_user.sh
13+
RUN bash ./install_user.sh && rm install_user.sh
14+
1015
COPY ./common/install_docs_reqs.sh install_docs_reqs.sh
1116
RUN bash ./install_docs_reqs.sh && rm install_docs_reqs.sh
1217

@@ -20,4 +25,5 @@ COPY ./common/install_conda.sh install_conda.sh
2025
COPY ./common/common_utils.sh common_utils.sh
2126
RUN bash ./install_conda.sh && rm install_conda.sh common_utils.sh /opt/conda/requirements.txt
2227

28+
USER ci-user
2329
CMD ["bash"]

.ci/docker/common/common_utils.sh

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@ as_ci_user() {
77
# NB: Pass on PATH and LD_LIBRARY_PATH to sudo invocation
88
# NB: This must be run from a directory that the user has access to,
99
# works around https://github.com/conda/conda-package-handling/pull/34
10-
sudo -E -H env -u SUDO_UID -u SUDO_GID -u SUDO_COMMAND -u SUDO_USER env "PATH=$PATH" "LD_LIBRARY_PATH=$LD_LIBRARY_PATH" $*
10+
sudo -E -H -u ci-user env -u SUDO_UID -u SUDO_GID -u SUDO_COMMAND -u SUDO_USER env "PATH=$PATH" "LD_LIBRARY_PATH=$LD_LIBRARY_PATH" $*
1111
}
1212

1313
conda_install() {

.ci/docker/common/install_conda.sh

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@ if [ -n "$ANACONDA_PYTHON_VERSION" ]; then
1212
CONDA_FILE="Miniconda3-latest-Linux-x86_64.sh"
1313

1414
mkdir -p /opt/conda
15+
chown ci-user:ci-user /opt/conda
1516

1617
source "$(dirname "${BASH_SOURCE[0]}")/common_utils.sh"
1718

.ci/docker/common/install_user.sh

Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,26 @@
1+
#!/bin/bash
2+
# Copyright (c) Meta Platforms, Inc. and affiliates.
3+
# All rights reserved.
4+
#
5+
# This source code is licensed under the BSD-style license found in the
6+
# LICENSE file in the root directory of this source tree.
7+
8+
# Copied from https://github.com/pytorch/executorch/blob/6e431355a554e5f84c3a05dfa2b981ead90c2b48/.ci/docker/common/install_user.sh#L1
9+
10+
set -ex
11+
12+
# Same as ec2-user
13+
echo "ci-user:x:1000:1000::/var/lib/ci-user:" >> /etc/passwd
14+
echo "ci-user:x:1000:" >> /etc/group
15+
# Needed on Focal or newer
16+
echo "ci-user:*:19110:0:99999:7:::" >> /etc/shadow
17+
18+
# Create $HOME
19+
mkdir -p /var/lib/ci-user
20+
chown ci-user:ci-user /var/lib/ci-user
21+
22+
# Allow sudo
23+
echo 'ci-user ALL=(ALL) NOPASSWD:ALL' > /etc/sudoers.d/ci-user
24+
25+
# Test that sudo works
26+
sudo -u ci-user sudo -v

.ci/docker/requirements.txt

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,6 @@
44
sphinx==5.0.0
55
sphinx-gallery==0.11.1
66
sphinx_design
7-
nbsphinx
87
docutils==0.16
98
sphinx-copybutton
109
pypandoc==1.12

.github/workflows/build-tutorials.yml

Lines changed: 2 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -93,7 +93,7 @@ jobs:
9393
"${DOCKER_IMAGE}"
9494
)
9595
96-
docker exec -t "${container_name}" sh -c ".jenkins/build.sh"
96+
docker exec -u ci-user -t "${container_name}" sh -c ".jenkins/build.sh"
9797
9898
- name: Teardown Linux
9999
uses: pytorch/test-infra/.github/actions/teardown-linux@main
@@ -162,9 +162,7 @@ jobs:
162162
"${DOCKER_IMAGE}"
163163
)
164164
165-
docker exec -u root -i "${container_name}" bash
166-
167-
docker exec -t "${container_name}" sh -c ".jenkins/build.sh"
165+
docker exec -u ci-user -t "${container_name}" sh -c ".jenkins/build.sh"
168166
169167
- name: Upload docs preview
170168
uses: seemethere/upload-artifact-s3@v5

beginner_source/blitz/neural_networks_tutorial.py

Lines changed: 27 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -55,16 +55,33 @@ def __init__(self):
5555
self.fc2 = nn.Linear(120, 84)
5656
self.fc3 = nn.Linear(84, 10)
5757

58-
def forward(self, x):
59-
# Max pooling over a (2, 2) window
60-
x = F.max_pool2d(F.relu(self.conv1(x)), (2, 2))
61-
# If the size is a square, you can specify with a single number
62-
x = F.max_pool2d(F.relu(self.conv2(x)), 2)
63-
x = torch.flatten(x, 1) # flatten all dimensions except the batch dimension
64-
x = F.relu(self.fc1(x))
65-
x = F.relu(self.fc2(x))
66-
x = self.fc3(x)
67-
return x
58+
def forward(self, input):
59+
# Convolution layer C1: 1 input image channel, 6 output channels,
60+
# 5x5 square convolution, it uses RELU activation function, and
61+
# outputs a Tensor with size (N, 6, 28, 28), where N is the size of the batch
62+
c1 = F.relu(self.conv1(input))
63+
# Subsampling layer S2: 2x2 grid, purely functional,
64+
# this layer does not have any parameter, and outputs a (N, 16, 14, 14) Tensor
65+
s2 = F.max_pool2d(c1, (2, 2))
66+
# Convolution layer C3: 6 input channels, 16 output channels,
67+
# 5x5 square convolution, it uses RELU activation function, and
68+
# outputs a (N, 16, 10, 10) Tensor
69+
c3 = F.relu(self.conv2(s2))
70+
# Subsampling layer S4: 2x2 grid, purely functional,
71+
# this layer does not have any parameter, and outputs a (N, 16, 5, 5) Tensor
72+
s4 = F.max_pool2d(c3, 2)
73+
# Flatten operation: purely functional, outputs a (N, 400) Tensor
74+
s4 = torch.flatten(s4, 1)
75+
# Fully connected layer F5: (N, 400) Tensor input,
76+
# and outputs a (N, 120) Tensor, it uses RELU activation function
77+
f5 = F.relu(self.fc1(s4))
78+
# Fully connected layer F6: (N, 120) Tensor input,
79+
# and outputs a (N, 84) Tensor, it uses RELU activation function
80+
f6 = F.relu(self.fc2(f5))
81+
# Gaussian layer OUTPUT: (N, 84) Tensor input, and
82+
# outputs a (N, 10) Tensor
83+
output = self.fc3(f6)
84+
return output
6885

6986

7087
net = Net()

conf.py

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -76,7 +76,6 @@
7676
'sphinx_copybutton',
7777
'sphinx_gallery.gen_gallery',
7878
'sphinx_design',
79-
'nbsphinx'
8079
]
8180

8281
intersphinx_mapping = {

intermediate_source/FSDP_tutorial.rst

Lines changed: 2 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,7 @@ Training AI models at a large scale is a challenging task that requires a lot of
1010
It also comes with considerable engineering complexity to handle the training of these very large models.
1111
`PyTorch FSDP <https://pytorch.org/blog/introducing-pytorch-fully-sharded-data-parallel-api/>`__, released in PyTorch 1.11 makes this easier.
1212

13-
In this tutorial, we show how to use `FSDP APIs <https://pytorch.org/docs/1.11/fsdp.html>`__, for simple MNIST models that can be extended to other larger models such as `HuggingFace BERT models <https://huggingface.co/blog/zero-deepspeed-fairscale>`__,
13+
In this tutorial, we show how to use `FSDP APIs <https://pytorch.org/docs/stable/fsdp.html>`__, for simple MNIST models that can be extended to other larger models such as `HuggingFace BERT models <https://huggingface.co/blog/zero-deepspeed-fairscale>`__,
1414
`GPT 3 models up to 1T parameters <https://pytorch.medium.com/training-a-1-trillion-parameter-model-with-pytorch-fully-sharded-data-parallel-on-aws-3ac13aa96cff>`__ . The sample DDP MNIST code has been borrowed from `here <https://github.com/yqhu/mnist_examples>`__.
1515

1616

@@ -63,9 +63,7 @@ Here we use a toy model to run training on the MNIST dataset for demonstration p
6363

6464
1.1 Install PyTorch along with Torchvision
6565

66-
.. code-block:: bash
67-
68-
pip3 install --pre torch torchvision torchaudio -f https://download.pytorch.org/whl/nightly/cu113/torch_nightly.html
66+
See the `Get Started guide <https://pytorch.org/get-started/locally/>`__ for information on installation.
6967

7068
We add the following code snippets to a python script “FSDP_mnist.py”.
7169

0 commit comments

Comments
 (0)