Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -96,6 +96,7 @@
public class RegionMaintainHandler {

private static final Logger LOGGER = LoggerFactory.getLogger(RegionMaintainHandler.class);
private static final int DELETE_OLD_REGION_PEER_RPC_RETRY_NUM = 3;

private static final ConfigNodeConfig CONF = ConfigNodeDescriptor.getInstance().getConf();

Expand Down Expand Up @@ -175,7 +176,10 @@ public TSStatus createNewRegionPeer(TConsensusGroupId regionId, TDataNodeLocatio
|| IOT_CONSENSUS_V2.equals(CONF.getDataRegionConsensusProtocolClass()))) {
// parameter of createPeer for MultiLeader should be all peers
currentPeerNodes = new ArrayList<>(regionReplicaNodes);
currentPeerNodes.add(destDataNode);
if (currentPeerNodes.stream()
.noneMatch(node -> node.getDataNodeId() == destDataNode.getDataNodeId())) {
currentPeerNodes.add(destDataNode);
}
} else {
// parameter of createPeer for Ratis can be empty
currentPeerNodes = Collections.emptyList();
Expand Down Expand Up @@ -299,15 +303,17 @@ public TSStatus submitDeleteOldRegionPeerTask(
TMaintainPeerReq maintainPeerReq =
new TMaintainPeerReq(regionId, originalDataNode, procedureId);

// Always use full retries regardless of node status, because after a cluster crash the
// target DataNode may be Unknown but still in the process of restarting.
// RemoveRegionPeerProcedure already retries DELETE_OLD_REGION_PEER at the procedure level.
// Keep each RPC attempt bounded so a permanently down original DataNode does not block the
// procedure for minutes, while still tolerating a briefly restarting node after cluster crash.
status =
(TSStatus)
SyncDataNodeClientPool.getInstance()
.sendSyncRequestToDataNodeWithRetry(
.sendSyncRequestToDataNodeWithGivenRetry(
originalDataNode.getInternalEndPoint(),
maintainPeerReq,
CnToDnSyncRequestType.DELETE_OLD_REGION_PEER);
CnToDnSyncRequestType.DELETE_OLD_REGION_PEER,
DELETE_OLD_REGION_PEER_RPC_RETRY_NUM);
LOGGER.info(
"{}, Send action deleteOldRegionPeer finished, regionId: {}, dataNodeId: {}",
REGION_MIGRATE_PROCESS,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -273,10 +273,17 @@ public void createLocalPeer(ConsensusGroupId groupId, List<Peer> peers)

String path = buildPeerDir(storageDir, groupId);
File file = new File(path);
if (!file.mkdirs()) {
if (!file.exists() && !file.mkdirs()) {
logger.warn("Unable to create consensus dir for group {} at {}", groupId, path);
return null;
}
if (!file.isDirectory()) {
logger.warn(
"Consensus dir path for group {} exists but is not a directory: {}",
groupId,
path);
return null;
}

IoTConsensusServerImpl impl =
new IoTConsensusServerImpl(
Expand Down
Loading