Skip to content

Parse dependencies from all args if depends_on is not used. #384

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 9 commits into from
Jul 5, 2023
2 changes: 2 additions & 0 deletions docs/source/changes.md
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,8 @@ releases are available on [PyPI](https://pypi.org/project/pytask) and
## 0.4.0 - 2023-xx-xx

- {pull}`323` remove Python 3.7 support and use a new Github action to provide mamba.
- {pull}`384` allows to parse dependencies from every function argument if `depends_on`
is not present.
- {pull}`387` replaces pony with sqlalchemy.
- {pull}`391` removes `@pytask.mark.parametrize`.

Expand Down
26 changes: 13 additions & 13 deletions docs/source/tutorials/defining_dependencies_products.md
Original file line number Diff line number Diff line change
Expand Up @@ -4,8 +4,8 @@ To ensure pytask executes all tasks in the correct order, define which dependenc
required and which products are produced by a task.

:::{important}
If you do not specify dependencies and products as explained below, pytask will not be able
to build a graph, a {term}`DAG`, and will not be able to execute all tasks in the
If you do not specify dependencies and products as explained below, pytask will not be
able to build a graph, a {term}`DAG`, and will not be able to execute all tasks in the
project correctly!
:::

Expand All @@ -19,15 +19,16 @@ def task_create_random_data(produces):
...
```

The {func}`@pytask.mark.produces <pytask.mark.produces>` marker attaches a
product to a task which is a {class}`pathlib.Path` to file. After the task has finished,
pytask will check whether the file exists.
The {func}`@pytask.mark.produces <pytask.mark.produces>` marker attaches a product to a
task which is a {class}`pathlib.Path` to file. After the task has finished, pytask will
check whether the file exists.

Optionally, you can use `produces` as an argument of the task function and get access to
the same path inside the task function.
Add `produces` as an argument of the task function to get access to the same path inside
the task function.

:::{tip}
If you do not know about {mod}`pathlib` check out [^id3] and [^id4]. The module is beneficial for handling paths conveniently and across platforms.
If you do not know about {mod}`pathlib` check out [^id3] and [^id4]. The module is
beneficial for handling paths conveniently and across platforms.
:::

## Dependencies
Expand All @@ -44,7 +45,7 @@ def task_plot_data(depends_on, produces):
...
```

Use `depends_on` as a function argument to work with the dependency path and, for
Add `depends_on` as a function argument to work with the path of the dependency and, for
example, load the data.

## Conversion
Expand All @@ -61,9 +62,6 @@ def task_create_random_data(produces):
...
```

If you use `depends_on` or `produces` as arguments for the task function, you will have
access to the paths of the targets as {class}`pathlib.Path`.

## Multiple dependencies and products

The easiest way to attach multiple dependencies or products to a task is to pass a
Expand Down Expand Up @@ -108,7 +106,9 @@ Why does pytask recommend dictionaries and convert lists, tuples, or other
iterators to dictionaries? First, dictionaries with positions as keys behave very
similarly to lists.

Secondly, dictionaries use keys instead of positions that are more verbose and descriptive and do not assume a fixed ordering. Both attributes are especially desirable in complex projects.
Secondly, dictionaries use keys instead of positions that are more verbose and
descriptive and do not assume a fixed ordering. Both attributes are especially desirable
in complex projects.

## Multiple decorators

Expand Down
25 changes: 17 additions & 8 deletions src/_pytask/collect.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,9 @@
from typing import Iterable

from _pytask.collect_utils import depends_on
from _pytask.collect_utils import parse_dependencies_from_task_function
from _pytask.collect_utils import parse_nodes
from _pytask.collect_utils import parse_products_from_task_function
from _pytask.collect_utils import produces
from _pytask.config import hookimpl
from _pytask.config import IS_FILE_SYSTEM_CASE_SENSITIVE
Expand All @@ -22,6 +24,7 @@
from _pytask.exceptions import CollectionError
from _pytask.mark_utils import has_mark
from _pytask.nodes import FilePathNode
from _pytask.nodes import PythonNode
from _pytask.nodes import Task
from _pytask.outcomes import CollectionOutcome
from _pytask.outcomes import count_outcomes
Expand Down Expand Up @@ -167,11 +170,20 @@ def pytask_collect_task(

"""
if (name.startswith("task_") or has_mark(obj, "task")) and callable(obj):
dependencies = parse_nodes(session, path, name, obj, depends_on)
products = parse_nodes(session, path, name, obj, produces)
if has_mark(obj, "depends_on"):
nodes = parse_nodes(session, path, name, obj, depends_on)
dependencies = {"depends_on": nodes}
else:
dependencies = parse_dependencies_from_task_function(
session, path, name, obj
)

if has_mark(obj, "produces"):
products = parse_nodes(session, path, name, obj, produces)
else:
products = parse_products_from_task_function(session, path, name, obj)

markers = obj.pytask_meta.markers if hasattr(obj, "pytask_meta") else []
kwargs = obj.pytask_meta.kwargs if hasattr(obj, "pytask_meta") else {}

# Get the underlying function to avoid having different states of the function,
# e.g. due to pytask_meta, in different layers of the wrapping.
Expand All @@ -184,7 +196,6 @@ def pytask_collect_task(
depends_on=dependencies,
produces=products,
markers=markers,
kwargs=kwargs,
)
return None

Expand All @@ -205,7 +216,7 @@ def pytask_collect_task(
@hookimpl(trylast=True)
def pytask_collect_node(
session: Session, path: Path, node: str | Path
) -> FilePathNode | None:
) -> FilePathNode | PythonNode:
"""Collect a node of a task as a :class:`pytask.nodes.FilePathNode`.

Strings are assumed to be paths. This might be a strict assumption, but since this
Expand All @@ -226,8 +237,6 @@ def pytask_collect_node(
handled by this function.

"""
if isinstance(node, str):
node = Path(node)
if isinstance(node, Path):
if not node.is_absolute():
node = path.parent.joinpath(node)
Expand All @@ -246,7 +255,7 @@ def pytask_collect_node(
raise ValueError(_TEMPLATE_ERROR.format(node, case_sensitive_path))

return FilePathNode.from_path(node)
return None
return PythonNode(value=node)


def _not_ignored_paths(
Expand Down
25 changes: 17 additions & 8 deletions src/_pytask/collect_command.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@
from _pytask.exceptions import ResolvingDependenciesError
from _pytask.mark import select_by_keyword
from _pytask.mark import select_by_mark
from _pytask.nodes import FilePathNode
from _pytask.outcomes import ExitCode
from _pytask.path import find_common_ancestor
from _pytask.path import relative_to
Expand Down Expand Up @@ -123,7 +124,11 @@ def _find_common_ancestor_of_all_nodes(
for task in tasks:
all_paths.append(task.path)
if show_nodes:
all_paths.extend(x.path for x in tree_just_flatten(task.depends_on))
all_paths.extend(
x.path
for x in tree_just_flatten(task.depends_on)
if isinstance(x, FilePathNode)
)
all_paths.extend(x.path for x in tree_just_flatten(task.produces))

common_ancestor = find_common_ancestor(*all_paths, *paths)
Expand Down Expand Up @@ -160,14 +165,14 @@ def _print_collected_tasks(

Parameters
----------
dictionary : Dict[Path, List["Task"]]
dictionary
A dictionary with path on the first level, tasks on the second, dependencies and
products on the third.
show_nodes : bool
show_nodes
Indicator for whether dependencies and products should be displayed.
editor_url_scheme : str
editor_url_scheme
The scheme to create an url.
common_ancestor : Path
common_ancestor
The path common to all tasks and nodes.

"""
Expand Down Expand Up @@ -197,9 +202,13 @@ def _print_collected_tasks(
)

if show_nodes:
for node in sorted(
tree_just_flatten(task.depends_on), key=lambda x: x.path
):
file_path_nodes = [
i
for i in tree_just_flatten(task.depends_on)
if isinstance(i, FilePathNode)
]
sorted_nodes = sorted(file_path_nodes, key=lambda x: x.path)
for node in sorted_nodes:
reduced_node_name = relative_to(node.path, common_ancestor)
url_style = create_url_style_for_path(node.path, editor_url_scheme)
task_branch.add(
Expand Down
Loading