few improvements

jqueguiner · jqueguiner · commit bc8cad1cba68 · 2021-03-21T20:08:13.000Z
diff --git a/Dockerfile b/Dockerfile
@@ -3,7 +3,8 @@ FROM ovhcom/ai-training-pytorch
 RUN apt-get update && \
     apt install -y bash \
         build-essential \
-        libsndfile1-dev
+        libsndfile1-dev \
+        git-lfs
 
 RUN python3 -m pip install --no-cache-dir --upgrade pip && \
     python3 -m pip install --no-cache-dir \
@@ -20,10 +21,12 @@ RUN mkdir -p /workspace/wav2vec/
 
 COPY finetune.sh run_common_voice.py  finetune_with_params.sh /workspace/wav2vec/
 
-COPY home-server.html /usr/bin/home-server.html
+COPY home-server.html run_all.sh /usr/bin/
 
 RUN chown -R 42420:42420 /workspace
 
+RUN chown -R 42420:42420 /usr/bin/run_all.sh
+
 #Default training env variables
 ENV model_name_or_path="facebook/wav2vec2-large-xlsr-53" \
     dataset_config_name="fr" \
@@ -45,4 +48,5 @@ ENV model_name_or_path="facebook/wav2vec2-large-xlsr-53" \
 
 WORKDIR /workspace
 ENTRYPOINT []
+#CMD ["sh", "/usr/bin/run_all.sh"]
 CMD ["supervisord", "-n", "-u", "42420", "-c", "/etc/supervisor/supervisor.conf"]
diff --git a/generate_all_trainings.py b/generate_all_trainings.py
@@ -17,7 +17,7 @@
     next(csv_reader)
     for (language_code, language_full_name) in csv_reader:
         print(f"#Launching Training for {language_code}-{language_full_name}")
-        cmd = f"ovhai job run --gpu 1 --name '{language_code}-{language_full_name}' --volume output_models@GRA:/workspace/output_models:RW:cache -e model_name_or_path='facebook/wav2vec2-large-xlsr-53' -e dataset_config_name={language_code} -e output_dir='/workspace/output_models/wav2vec2-large-xlsr-{language_code}-{language_full_name}-demo' -e cache_dir='/workspace/data' databuzzword/hf-wav2vec -- sh /workspace/wav2vec/finetune_with_params.sh"
+        cmd = f"ovhai job run --gpu 1 --name '{language_code}-{language_full_name}' --volume output_models@GRA/{language_code}:/workspace/output_models:RW:cache -e model_name_or_path='facebook/wav2vec2-large-xlsr-53' -e dataset_config_name={language_code} -e output_dir='/workspace/output_models/wav2vec2-large-xlsr-{language_code}-{language_full_name}-demo' -e cache_dir='/workspace/data' -e num_train_epochs=10 databuzzword/hf-wav2vec -- sh /workspace/wav2vec/finetune_with_params.sh"
         print(cmd)
         stream = os.popen(cmd)
         output = stream.read()
diff --git a/run_all.sh b/run_all.sh
@@ -0,0 +1,2 @@
+#!/usr/bin/env bash
+supervisord -n -u 42420 -c /etc/supervisor/supervisor.conf

Original file line number	Diff line number	Diff line change
`@@ -0,0 +1,2 @@`
	`1`	`+#!/usr/bin/env bash`
	`2`	`+supervisord -n -u 42420 -c /etc/supervisor/supervisor.conf`