Make prefix logic configurable and polish docs

saridormi · saridormi · commit 704f0a167dce · 2023-05-08T16:43:42.000+02:00
diff --git a/README.md b/README.md
@@ -99,14 +99,35 @@ This project explores two kinds of input for commit message completion task: dif
 2. Choose one of available model configs or add your own.
 3. Note that you have to define missing parameters from [`InputConfig`](conf/data/input_config.py). You can do it via CLI or just rewrite them. Below is the example how to define parameters via CLI.
 
-To launch training of model defined as `XXXModelConfig` and registered via `ConfigStore.store(name="XXX", group="model", node=XXXModelConfig)`, run the following command:
+To launch training of model defined as `XXXModelConfig` and registered via `ConfigStore.store(name="XXX", group="model", node=XXXModelConfig)`, run the following command (with actual values instead of X's):
 ```
 python train.py +model=XXX ++input.train_with_history=X ++input.encoder_input_type=X
 ```
 
 #### Additional steps for RACE model
 
-> :construction: Experiments with RACE model require slightly different procedure. It will be described in this section.
+Experiments with RACE model require a slightly different procedure.
+
+1. Fine-tune CodeT5 model. Refer to the instruction above for details.
+
+2. Use encoder from fine-tuned CodeT5 checkpoint to perform retrieval. 
+   
+    Define configuration in [`conf/retrieval_config.py`](conf/retrieval_config.py). You have to either provide a local path to checkpoint in `ckpt_path` or use W&B artifact.
+   In the latter case, artifact name will be inferred from model configuration.
+   
+    An example with a local path:
+    ```
+    python retrieve.py ++ckpt_path=<local_path>
+    ```
+
+    An example with a W&B artifact:
+    ```
+    python retrieve.py +model=codet5 ++input.train_with_history=X ++input.encoder_input_type=X
+    ```
+3. Initialize RACE with fine-tuned CodeT5 weights and use retrieved examples to train the model. 
+   Refer to the instruction above for details.
+    
+    > :construction: Currently, downloading retrieved predictions and fine-tuned CodeT5 checkpoint is only possible with W&B.
 
 ### Step 4: Evaluate
 
diff --git a/compute_metrics.py b/compute_metrics.py
@@ -18,14 +18,18 @@
 random.seed(42)
 
 
-def load_predictions(run: wandb.wandb_sdk.wandb_run.Run, cfg: MetricsConfig) -> str:
-    input_artifact = run.use_artifact(
+def load_predictions(cfg: MetricsConfig) -> str:
+    """Load predictions from W&B artifact.
+
+    Args:
+        cfg: Config; all information about artifact should be provided in corresponding fields there.
+
+    Returns:
+        Local path to downloaded predictions.
+    """
+    input_artifact = wandb.use_artifact(
         f"{cfg.logger.artifact_config.project}/{cfg.logger.artifact_config.name}:{cfg.logger.artifact_config.version}"
     )
-    if "tags" in input_artifact.metadata:
-        run.tags = ["new_prefix_logic"] + (
-            ["only_filtered" if cfg.filter.fit_filters else "only_unfiltered"] if cfg.filter.use_filtering else []
-        )
 
     input_artifact.get_path(cfg.logger.artifact_config.artifact_path).download(
         root=hydra.utils.to_absolute_path(
@@ -43,8 +47,24 @@ def load_predictions(run: wandb.wandb_sdk.wandb_run.Run, cfg: MetricsConfig) ->
 
 
 def add_single_example(
-    line: Dict[str, str], full_metrics: EvaluationMetrics, prefix_metrics: Dict[int, EvaluationMetrics]
+    line: Dict[str, str],
+    full_metrics: EvaluationMetrics,
+    prefix_metrics: Dict[int, EvaluationMetrics],
+    include_short: bool,
 ) -> None:
+    """Adds a single example to metrics.
+
+    * Compute the usual metrics between full prediction and full target.
+    * Compute the metrics between all prefixes of prediction and target,
+      `prefix_metrics` keys are used to determine the numbers of tokens in prefixes.
+
+    Args:
+        line: Current example, expected to include keys `Prediction` and `Target`.
+        full_metrics: A class for calculating metrics between full prediction and full target.
+        prefix_metrics: A dictionary where key `i` corresponds to metrics for prefixes of `i` tokens.
+        include_short: False to only consider messages with >= i tokens when computing metrics for prefixes of i tokens,
+         True to include all messages.
+    """
     prediction = line["Prediction"].strip()
     target = line["Target"].strip()
 
@@ -60,6 +80,8 @@ def add_single_example(
     target_tokens = target.split()
 
     for i in prefix_metrics:
+        if not include_short and len(target_tokens) < i:
+            break
         pred_prefix_i = " ".join(pred_tokens[:i])
         target_prefix_i = " ".join(target_tokens[:i])
         prefix_metrics[i].add_batch(predictions=[pred_prefix_i], references=[target_prefix_i])
@@ -80,8 +102,10 @@ def main(cfg: MetricsConfig):
             name=cfg.logger.artifact_config.name,
             config=OmegaConf.to_container(cfg, resolve=True),  # type: ignore[arg-type]
             job_type="metrics" if not cfg.filter.use_filtering else "filter_metrics",
+            tags=(["new_prefix_logic"] if cfg.include_short else [])
+            + (["only_filtered" if cfg.filter.fit_filters else "only_unfiltered"] if cfg.filter.use_filtering else []),
         )  # type: ignore[assignment]
-        cfg.preds_path = load_predictions(run=run, cfg=cfg)
+        cfg.preds_path = load_predictions(cfg)
     elif cfg.preds_path:
         cfg.preds_path = to_absolute_path(cfg.preds_path)
     else:
@@ -102,7 +126,9 @@ def main(cfg: MetricsConfig):
     if not cfg.filter.use_filtering:
         with jsonlines.open(cfg.preds_path, "r") as reader:
             for line in tqdm(reader, desc="Computing metrics"):
-                add_single_example(line, full_metrics=full_metrics, prefix_metrics=prefix_metrics)
+                add_single_example(
+                    line, full_metrics=full_metrics, prefix_metrics=prefix_metrics, include_short=cfg.include_short
+                )
 
     # or define filters configuration to control what subset will be considered
     else:
@@ -156,7 +182,12 @@ def include_example(filters_line: Dict[str, str]) -> bool:
                         and i in subset_ids
                         and include_example(filters_line)
                     ):
-                        add_single_example(input_line, full_metrics=full_metrics, prefix_metrics=prefix_metrics)
+                        add_single_example(
+                            input_line,
+                            full_metrics=full_metrics,
+                            prefix_metrics=prefix_metrics,
+                            include_short=cfg.include_short,
+                        )
 
     # -----------------------
     # -   compute results   -
diff --git a/conf/metrics_config.py b/conf/metrics_config.py
@@ -83,10 +83,13 @@ class MetricsConfig:
 
     Attributes:
         preds_path: Local path to model predictions. Instead of this, you can also define configuration for loading artifact at WandbMetricConfig.
+        include_short: False to only consider messages with >= i tokens when computing metrics for prefixes of i tokens,
+         True to include all messages.
         max_n_tokens: Maximum number of tokens (for prefix-level metrics).
     """
 
     preds_path: Optional[str] = None
+    include_short: bool = False
     max_n_tokens: int = 15
     filter: FilterConfig = field(default_factory=FilterConfig)
     logger: WandbMetricConfig = field(default_factory=WandbMetricConfig)
diff --git a/retrieve.py b/retrieve.py
@@ -42,7 +42,11 @@ def download_artifact(cfg: RetrievalConfig, run: wandb.wandb_sdk.wandb_run.Run,
 
 
 def export_model_checkpoint(cfg: RetrievalConfig) -> str:
-    """Helper function to export model weights in Transformers format from Lightning checkpoint."""
+    """Helper function to export model weights in a Transformers format from Lightning checkpoint.
+
+    Returns:
+        A local path to directory with checkpoint in a Transformers format.
+    """
     logging.info(f"Checkpoint path: {cfg.ckpt_path}")
 
     module = CMCModule.load_from_checkpoint(
diff --git a/train.py b/train.py
@@ -21,6 +21,15 @@
 
 
 def get_world_size(accelerator: str, devices: Any) -> int:
+    """Determines world size for all possible ways of defining number of devices in Lightning.
+
+    Args:
+        accelerator: Argument for `pytorch_lightning.trainer`, corresponds to a device type.
+        devices: Argument for `pytorch_lightning.trainer`, corresponds to a number of devices/specific devices to use.
+
+    Returns:
+        World size.
+    """
     if accelerator == "cpu":
         return 1
     elif accelerator == "gpu":