KevinMusgrave
diff --git a/‎docs/losses.md
+78-4 b/‎docs/losses.md
+78-4
diff --git a/‎docs/wrappers.md
-85 b/‎docs/wrappers.md
-85
diff --git a/‎src/pytorch_metric_learning/losses/__init__.py
+4 b/‎src/pytorch_metric_learning/losses/__init__.py
+4
diff --git a/‎src/pytorch_metric_learning/wrappers/base_loss_wrapper.py renamed to ‎src/pytorch_metric_learning/losses/base_loss_wrapper.py b/‎src/pytorch_metric_learning/wrappers/base_loss_wrapper.py renamed to ‎src/pytorch_metric_learning/losses/base_loss_wrapper.py
diff --git a/‎src/pytorch_metric_learning/wrappers/cross_batch_memory.py renamed to ‎src/pytorch_metric_learning/losses/cross_batch_memory.py b/‎src/pytorch_metric_learning/wrappers/cross_batch_memory.py renamed to ‎src/pytorch_metric_learning/losses/cross_batch_memory.py
diff --git a/‎src/pytorch_metric_learning/wrappers/multiple_losses.py renamed to ‎src/pytorch_metric_learning/losses/multiple_losses.py b/‎src/pytorch_metric_learning/wrappers/multiple_losses.py renamed to ‎src/pytorch_metric_learning/losses/multiple_losses.py
diff --git a/‎src/pytorch_metric_learning/wrappers/self_supervised_loss.py renamed to ‎src/pytorch_metric_learning/losses/self_supervised_loss.py b/‎src/pytorch_metric_learning/wrappers/self_supervised_loss.py renamed to ‎src/pytorch_metric_learning/losses/self_supervised_loss.py
diff --git a/‎src/pytorch_metric_learning/utils/distributed.py
+1-2 b/‎src/pytorch_metric_learning/utils/distributed.py
+1-2
diff --git a/‎tests/utils/test_distributed.py
+1-2 b/‎tests/utils/test_distributed.py
+1-2
diff --git a/‎tests/wrappers/test_cross_batch_memory_wrapper.py
+1-1 b/‎tests/wrappers/test_cross_batch_memory_wrapper.py
+1-1
diff --git a/‎tests/wrappers/test_multiple_losses_wrapper.py
+5-2 b/‎tests/wrappers/test_multiple_losses_wrapper.py
+5-2
diff --git a/‎tests/wrappers/test_self_supervised_loss_wrapper.py
+2-3 b/‎tests/wrappers/test_self_supervised_loss_wrapper.py
+2-3
@@ -297,6 +297,44 @@ loss_optimizer.step()
 * **loss**: The loss per element in the batch. Reduction type is ```"element"```.
 
 
+## CrossBatchMemory 
+This wraps a loss function, and implements [Cross-Batch Memory for Embedding Learning](https://arxiv.org/pdf/1912.06798.pdf){target=_blank}. It stores embeddings from previous iterations in a queue, and uses them to form more pairs/triplets with the current iteration's embeddings.
+
+```python
+losses.CrossBatchMemory(loss, embedding_size, memory_size=1024, miner=None)
+```
+
+**Parameters**:
+
+* **loss**: The loss function to be wrapped. For example, you could pass in ```ContrastiveLoss()```.
+* **embedding_size**: The size of the embeddings that you pass into the loss function. For example, if your batch size is 128 and your network outputs 512 dimensional embeddings, then set ```embedding_size``` to 512.
+* **memory_size**: The size of the memory queue.
+* **miner**: An optional [tuple miner](miners.md), which will be used to mine pairs/triplets from the memory queue.
+
+**Forward function**
+```python
+loss_fn(embeddings, labels, indices_tuple=None, enqueue_mask=None)
+```
+
+As shown above, CrossBatchMemory comes with a 4th argument in its ```forward``` function:
+
+* **enqueue_mask**: A boolean tensor where `enqueue_mask[i]` is True if `embeddings[i]` should be added to the memory queue. This enables CrossBatchMemory to be used in self-supervision frameworks like [MoCo](https://arxiv.org/pdf/1911.05722.pdf). Check out the [MoCo on CIFAR100](https://github.com/KevinMusgrave/pytorch-metric-learning/tree/master/examples#simple-examples) notebook to see how this works.
+
+
+**Supported Loss Functions**:
+ - [AngularLoss](losses.md#AngularLoss)
+ - [CircleLoss](losses.md#CircleLoss)
+ - [ContrastiveLoss](losses.md#ContrastiveLoss)
+ - [GeneralizedLiftedStructureLoss](losses.md#GeneralizedLiftedStructureLoss)
+ - [IntraPairVarianceLoss](losses.md#IntraPairVarianceLoss)
+ - [LiftedStructureLoss](losses.md#LiftedStructureLoss)
+ - [MultiSimilarityLoss](losses.md#MultiSimilarityLoss)
+ - [NTXentLoss](losses.md#NTXentLoss)
+ - [SignalToNoiseRatioContrastiveLoss](losses.md#SignalToNoiseRatioContrastiveLoss)
+ - [SupConLoss](losses.md#SupConLoss)
+ - [TripletMarginLoss](losses.md#TripletMarginLoss)
+ - [TupletMarginLoss](losses.md#TupletMarginLoss)
+
 
 **Reset queue**
 
@@ -401,11 +439,11 @@ losses.IntraPairVarianceLoss(pos_eps=0.01, neg_eps=0.01, **kwargs)
 * **pos_eps**: The epsilon in the L<sub>pos</sub> equation. The paper uses 0.01.
 * **neg_eps**: The epsilon in the L<sub>neg</sub> equation. The paper uses 0.01.
 
-You should probably use this in conjunction with another loss, as described in the paper. You can accomplish this by using [MultipleLosses](wrappers.md#multiplelosses):
+You should probably use this in conjunction with another loss, as described in the paper. You can accomplish this by using [MultipleLosses](losses.md#multiplelosses):
 ```python
 main_loss = losses.TupletMarginLoss()
 var_loss = losses.IntraPairVarianceLoss()
-complete_loss = wrappers.MultipleLosses([main_loss, var_loss], weights=[1, 0.5])
+complete_loss = losses.MultipleLosses([main_loss, var_loss], weights=[1, 0.5])
 ```
 
 **Default distance**: 
@@ -579,6 +617,18 @@ losses.MultiSimilarityLoss(alpha=2, beta=50, base=0.5, **kwargs)
 
 * **loss**: The loss per element in the batch. Reduction type is ```"element"```.
 
+## MultipleLosses
+This is a simple wrapper for multiple losses. Pass in a list of already-initialized loss functions. Then, when you call forward on this object, it will return the sum of all wrapped losses.
+```python
+losses.MultipleLosses(losses, miners=None, weights=None)
+```
+**Parameters**:
+
+* **losses**: A list or dictionary of initialized loss functions. On the forward call of MultipleLosses, each wrapped loss will be computed, and then the average will be returned.
+* **miners**: Optional. A list or dictionary of mining functions. This allows you to pair mining functions with loss functions. For example, if ```losses = [loss_A, loss_B]```, and ```miners = [None, miner_B]``` then no mining will be done for ```loss_A```, but the output of ```miner_B``` will be passed to ```loss_B```. The same logic applies if ```losses = {"loss_A": loss_A, "loss_B": loss_B}``` and ```miners = {"loss_B": miner_B}```.
+* **weights**: Optional. A list or dictionary of loss weights, which will be multiplied by the corresponding losses obtained by the loss functions. The default is to multiply each loss by 1. If ```losses``` is a list, then ```weights``` must be a list. If ```losses``` is a dictionary, ```weights``` must contain the same keys as ```losses```. 
+
+
 ## NCALoss
 [Neighbourhood Components Analysis](https://www.cs.toronto.edu/~hinton/absps/nca.pdf){target=_blank}
 ```python
@@ -787,6 +837,30 @@ loss_optimizer.step()
 * **loss**: The loss per element in the batch, that results in a non zero exponent in the cross entropy expression. Reduction type is ```"element"```.
 
 
+## SelfSupervisedLoss
+
+A common use case is to have embeddings and ref_emb be augmented versions of each other. For most losses right now you have to create labels to indicate which embeddings correspond with which ref_emb. `SelfSupervisedLoss` automates this.
+
+```python
+loss_fn = losses.TripletMarginLoss()
+loss_fn = SelfSupervisedLoss(loss_fn)
+loss = loss_fn(embeddings, labels)
+```
+
+**Supported Loss Functions**:
+ - [AngularLoss](losses.md#AngularLoss)
+ - [CircleLoss](losses.md#CircleLoss)
+ - [ContrastiveLoss](losses.md#ContrastiveLoss)
+ - [IntraPairVarianceLoss](losses.md#IntraPairVarianceLoss)
+ - [MultiSimilarityLoss](losses.md#MultiSimilarityLoss)
+ - [NTXentLoss](losses.md#NTXentLoss)
+ - [SignalToNoiseRatioContrastiveLoss](losses.md#SignalToNoiseRatioContrastiveLoss)
+ - [SupConLoss](losses.md#SupConLoss)
+ - [TripletMarginLoss](losses.md#TripletMarginLoss)
+ - [TupletMarginLoss](losses.md#TupletMarginLoss)
+
+
+
 ## SignalToNoiseRatioContrastiveLoss
 [Signal-to-Noise Ratio: A Robust Distance Metric for Deep Metric Learning](http://openaccess.thecvf.com/content_CVPR_2019/papers/Yuan_Signal-To-Noise_Ratio_A_Robust_Distance_Metric_for_Deep_Metric_Learning_CVPR_2019_paper.pdf){target=_blank}
 ```python
@@ -1023,11 +1097,11 @@ losses.TupletMarginLoss(margin=5.73, scale=64, **kwargs)
 * **margin**: The angular margin (in degrees) applied to positive pairs. This is beta in the above equation. The paper uses a value of 5.73 degrees (0.1 radians).
 * **scale**: This is ```s``` in the above equation.
 
-The paper combines this loss with [IntraPairVarianceLoss](losses.md#intrapairvarianceloss). You can accomplish this by using [MultipleLosses](wrappers.md#multiplelosses):
+The paper combines this loss with [IntraPairVarianceLoss](losses.md#intrapairvarianceloss). You can accomplish this by using [MultipleLosses](losses.md#multiplelosses):
 ```python
 main_loss = losses.TupletMarginLoss()
 var_loss = losses.IntraPairVarianceLoss()
-complete_loss = wrappers.MultipleLosses([main_loss, var_loss], weights=[1, 0.5])
+complete_loss = losses.MultipleLosses([main_loss, var_loss], weights=[1, 0.5])
 ```
 
 **Default distance**: 
 
@@ -1,9 +1,11 @@
 from .angular_loss import AngularLoss
 from .arcface_loss import ArcFaceLoss
+from .base_loss_wrapper import BaseLossWrapper
 from .base_metric_loss_function import BaseMetricLossFunction
 from .circle_loss import CircleLoss
 from .contrastive_loss import ContrastiveLoss
 from .cosface_loss import CosFaceLoss
+from .cross_batch_memory import CrossBatchMemory
 from .fast_ap_loss import FastAPLoss
 from .generic_pair_loss import GenericPairLoss
 from .instance_loss import InstanceLoss
@@ -13,12 +15,14 @@
 from .margin_loss import MarginLoss
 from .mixins import EmbeddingRegularizerMixin, WeightRegularizerMixin
 from .multi_similarity_loss import MultiSimilarityLoss
+from .multiple_losses import MultipleLosses
 from .n_pairs_loss import NPairsLoss
 from .nca_loss import NCALoss
 from .normalized_softmax_loss import NormalizedSoftmaxLoss
 from .ntxent_loss import NTXentLoss
 from .proxy_anchor_loss import ProxyAnchorLoss
 from .proxy_losses import ProxyNCALoss
+from .self_supervised_loss import SelfSupervisedLoss
 from .signal_to_noise_ratio_losses import SignalToNoiseRatioContrastiveLoss
 from .soft_triple_loss import SoftTripleLoss
 from .sphereface_loss import SphereFaceLoss
 
@@ -1,10 +1,9 @@
 import torch
 
-from ..losses import BaseMetricLossFunction
+from ..losses import BaseMetricLossFunction, CrossBatchMemory
 from ..miners import BaseMiner
 from ..utils import common_functions as c_f
 from ..utils import loss_and_miner_utils as lmu
-from ..wrappers import CrossBatchMemory
 
 
 # modified from https://github.com/allenai/allennlp
 
@@ -8,10 +8,9 @@
 import torch.optim as optim
 from torch.nn.parallel import DistributedDataParallel as DDP
 
-from pytorch_metric_learning.losses import ContrastiveLoss
+from pytorch_metric_learning.losses import ContrastiveLoss, CrossBatchMemory
 from pytorch_metric_learning.miners import PairMarginMiner
 from pytorch_metric_learning.utils import distributed
-from pytorch_metric_learning.wrappers import CrossBatchMemory
 
 from .. import TEST_DEVICE, TEST_DTYPES
 
 
@@ -5,6 +5,7 @@
 import pytorch_metric_learning.losses as losses
 from pytorch_metric_learning.losses import (
     ContrastiveLoss,
+    CrossBatchMemory,
     MultiSimilarityLoss,
     NTXentLoss,
 )
@@ -15,7 +16,6 @@
     TripletMarginMiner,
 )
 from pytorch_metric_learning.utils import loss_and_miner_utils as lmu
-from pytorch_metric_learning.wrappers import CrossBatchMemory
 
 from .. import TEST_DEVICE, TEST_DTYPES
 from ..zzz_testing_utils.testing_utils import angle_to_coord
 
@@ -2,9 +2,12 @@
 
 import torch
 
-from pytorch_metric_learning.losses import ContrastiveLoss, TripletMarginLoss
+from pytorch_metric_learning.losses import (
+    ContrastiveLoss,
+    MultipleLosses,
+    TripletMarginLoss,
+)
 from pytorch_metric_learning.miners import MultiSimilarityMiner
-from pytorch_metric_learning.wrappers import MultipleLosses
 
 from .. import TEST_DEVICE, TEST_DTYPES
 from ..zzz_testing_utils.testing_utils import angle_to_coord
 
@@ -3,7 +3,6 @@
 import torch
 
 import pytorch_metric_learning.losses as losses
-from pytorch_metric_learning.wrappers import SelfSupervisedLoss
 
 from .. import TEST_DEVICE, TEST_DTYPES
 
@@ -60,14 +59,14 @@ def run_all_loss_fns_wrapped(self, embeddings, ref_emb):
         loss_fns = dict()
         for loss_fn in loss_fns_list:
             loss_name = type(loss_fn).__name__
-            loss_fn = SelfSupervisedLoss(loss_fn)
+            loss_fn = losses.SelfSupervisedLoss(loss_fn)
             loss_value = loss_fn(embeddings=embeddings, ref_emb=ref_emb)
             loss_fns[loss_name] = loss_value
 
         return loss_fns
 
     def load_valid_loss_fns(self):
-        supported_losses = SelfSupervisedLoss.supported_losses()
+        supported_losses = losses.SelfSupervisedLoss.supported_losses()
 
         loss_fns = [
             losses.AngularLoss(),
Original file line number	Diff line number	Diff line change
`@@ -5,6 +5,7 @@`
`5`	`5`	`import pytorch_metric_learning.losses as losses`
`6`	`6`	`from pytorch_metric_learning.losses import (`
`7`	`7`	`ContrastiveLoss,`
	`8`	`+ CrossBatchMemory,`
`8`	`9`	`MultiSimilarityLoss,`
`9`	`10`	`NTXentLoss,`
`10`	`11`	`)`
`@@ -15,7 +16,6 @@`
`15`	`16`	`TripletMarginMiner,`
`16`	`17`	`)`
`17`	`18`	`from pytorch_metric_learning.utils import loss_and_miner_utils as lmu`
`18`		`-from pytorch_metric_learning.wrappers import CrossBatchMemory`
`19`	`19`
`20`	`20`	`from .. import TEST_DEVICE, TEST_DTYPES`
`21`	`21`	`from ..zzz_testing_utils.testing_utils import angle_to_coord`