|
53 | 53 | import org.elasticsearch.cluster.routing.allocation.decider.ThrottlingAllocationDecider;
|
54 | 54 | import org.elasticsearch.cluster.service.ClusterService;
|
55 | 55 | import org.elasticsearch.common.Nullable;
|
| 56 | +import org.elasticsearch.common.Randomness; |
56 | 57 | import org.elasticsearch.common.Strings;
|
57 | 58 | import org.elasticsearch.common.breaker.CircuitBreaker;
|
58 | 59 | import org.elasticsearch.common.component.LifecycleListener;
|
@@ -1729,50 +1730,29 @@ private void removeExclusions(Set<String> excludedNodeIds) {
|
1729 | 1730 | public synchronized void fullRestart(RestartCallback callback) throws Exception {
|
1730 | 1731 | int numNodesRestarted = 0;
|
1731 | 1732 | final Settings[] newNodeSettings = new Settings[nextNodeId.get()];
|
1732 |
| - Map<Set<DiscoveryNodeRole>, List<NodeAndClient>> nodesByRoles = new HashMap<>(); |
1733 |
| - Set[] rolesOrderedByOriginalStartupOrder = new Set[nextNodeId.get()]; |
1734 |
| - final int nodeCount = nodes.size(); |
| 1733 | + final List<NodeAndClient> toStartAndPublish = new ArrayList<>(); // we want to start nodes in one go |
1735 | 1734 | for (NodeAndClient nodeAndClient : nodes.values()) {
|
1736 | 1735 | callback.doAfterNodes(numNodesRestarted++, nodeAndClient.nodeClient());
|
1737 | 1736 | logger.info("Stopping and resetting node [{}] ", nodeAndClient.name);
|
1738 | 1737 | if (activeDisruptionScheme != null) {
|
1739 | 1738 | activeDisruptionScheme.removeFromNode(nodeAndClient.name, this);
|
1740 | 1739 | }
|
1741 |
| - DiscoveryNode discoveryNode = getInstanceFromNode(ClusterService.class, nodeAndClient.node()).localNode(); |
1742 | 1740 | final Settings newSettings = nodeAndClient.closeForRestart(callback);
|
1743 | 1741 | newNodeSettings[nodeAndClient.nodeAndClientId()] = newSettings;
|
1744 |
| - rolesOrderedByOriginalStartupOrder[nodeAndClient.nodeAndClientId()] = discoveryNode.getRoles(); |
1745 |
| - nodesByRoles.computeIfAbsent(discoveryNode.getRoles(), k -> new ArrayList<>()).add(nodeAndClient); |
| 1742 | + toStartAndPublish.add(nodeAndClient); |
1746 | 1743 | }
|
1747 | 1744 |
|
1748 | 1745 | callback.onAllNodesStopped();
|
1749 | 1746 |
|
1750 |
| - assert nodesByRoles.values().stream().mapToInt(List::size).sum() == nodeCount; |
| 1747 | + // randomize start up order |
| 1748 | + Randomness.shuffle(toStartAndPublish); |
1751 | 1749 |
|
1752 |
| - // randomize start up order, but making sure that: |
1753 |
| - // 1) A data folder that was assigned to a data node will stay so |
1754 |
| - // 2) Data nodes will get the same node lock ordinal range, so custom index paths (where the ordinal is used) |
1755 |
| - // will still belong to data nodes |
1756 |
| - for (List<NodeAndClient> sameRoleNodes : nodesByRoles.values()) { |
1757 |
| - Collections.shuffle(sameRoleNodes, random); |
1758 |
| - } |
1759 |
| - final List<NodeAndClient> startUpOrder = new ArrayList<>(); |
1760 |
| - for (Set roles : rolesOrderedByOriginalStartupOrder) { |
1761 |
| - if (roles == null) { |
1762 |
| - // if some nodes were stopped, we want have a role for that ordinal |
1763 |
| - continue; |
1764 |
| - } |
1765 |
| - final List<NodeAndClient> nodesByRole = nodesByRoles.get(roles); |
1766 |
| - startUpOrder.add(nodesByRole.remove(0)); |
| 1750 | + for (NodeAndClient nodeAndClient : toStartAndPublish) { |
| 1751 | + logger.info("recreating node [{}] ", nodeAndClient.name); |
| 1752 | + nodeAndClient.recreateNode(newNodeSettings[nodeAndClient.nodeAndClientId()], () -> rebuildUnicastHostFiles(toStartAndPublish)); |
1767 | 1753 | }
|
1768 |
| - assert nodesByRoles.values().stream().mapToInt(List::size).sum() == 0; |
1769 | 1754 |
|
1770 |
| - for (NodeAndClient nodeAndClient : startUpOrder) { |
1771 |
| - logger.info("creating node [{}] ", nodeAndClient.name); |
1772 |
| - nodeAndClient.recreateNode(newNodeSettings[nodeAndClient.nodeAndClientId()], () -> rebuildUnicastHostFiles(startUpOrder)); |
1773 |
| - } |
1774 |
| - |
1775 |
| - startAndPublishNodesAndClients(startUpOrder); |
| 1755 | + startAndPublishNodesAndClients(toStartAndPublish); |
1776 | 1756 |
|
1777 | 1757 | if (callback.validateClusterForming()) {
|
1778 | 1758 | validateClusterFormed();
|
|
0 commit comments