Skip to content

Commit bcdcb53

Browse files
committed
horizontal scale retry
1 parent 19768bc commit bcdcb53

File tree

15 files changed

+283
-69
lines changed

15 files changed

+283
-69
lines changed

cachecloud-open-web/manager/cachecloud_manager.log.2017062421

-1
This file was deleted.
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,29 @@
1+
package com.sohu.cache.constant;
2+
3+
/**
4+
* @author leifu
5+
* @Date 2017年7月13日
6+
* @Time 下午3:26:03
7+
*/
8+
public enum PipelineEnum {
9+
NO(0), YES(1);
10+
11+
private int value;
12+
13+
private PipelineEnum(int value) {
14+
this.value = value;
15+
}
16+
17+
public static PipelineEnum getPipelineEnum(int pipelineInt) {
18+
for (PipelineEnum pipelineEnum : PipelineEnum.values()) {
19+
if (pipelineInt == pipelineEnum.value) {
20+
return pipelineEnum;
21+
}
22+
}
23+
return null;
24+
}
25+
26+
public int getValue() {
27+
return value;
28+
}
29+
}

cachecloud-open-web/src/main/java/com/sohu/cache/entity/AppStats.java

+19-1
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,12 @@
11
package com.sohu.cache.entity;
22

3+
import java.text.DecimalFormat;
4+
import java.text.NumberFormat;
35
import java.util.Date;
46
import java.util.List;
57

8+
import org.apache.commons.lang.math.NumberUtils;
9+
610
/**
711
* Created by yijunzhang on 14-6-9.
812
*/
@@ -106,7 +110,21 @@ public void setCollectTime(long collectTime) {
106110
public long getHits() {
107111
return hits;
108112
}
109-
113+
114+
/**
115+
* 命中率
116+
* @return
117+
*/
118+
public long getHitPercent() {
119+
long total = hits + misses;
120+
if (total == 0) {
121+
return 0;
122+
} else {
123+
NumberFormat formatter = new DecimalFormat("0");
124+
return NumberUtils.toLong(formatter.format(hits * 100.0 / total));
125+
}
126+
}
127+
110128
public void setHits(long hits) {
111129
this.hits = hits;
112130
}

cachecloud-open-web/src/main/java/com/sohu/cache/entity/InstanceReshardProcess.java

+41-1
Original file line numberDiff line numberDiff line change
@@ -31,11 +31,22 @@ public class InstanceReshardProcess {
3131
* 源实例id
3232
*/
3333
private int sourceInstanceId;
34-
34+
35+
36+
/**
37+
* 源实例
38+
*/
39+
private InstanceInfo sourceInstanceInfo;
40+
3541
/**
3642
* 目标实例id
3743
*/
3844
private int targetInstanceId;
45+
46+
/**
47+
* 目标实例
48+
*/
49+
private InstanceInfo targetInstanceInfo;
3950

4051
/**
4152
* 开始slot
@@ -51,6 +62,11 @@ public class InstanceReshardProcess {
5162
* 正在迁移的slot
5263
*/
5364
private int migratingSlot;
65+
66+
/**
67+
* 0是,1否
68+
*/
69+
private int isPipeline;
5470

5571
/**
5672
* 已完成迁移的slot数量
@@ -174,6 +190,30 @@ public Date getStartTime() {
174190
return startTime;
175191
}
176192

193+
public int getIsPipeline() {
194+
return isPipeline;
195+
}
196+
197+
public void setIsPipeline(int isPipeline) {
198+
this.isPipeline = isPipeline;
199+
}
200+
201+
public InstanceInfo getSourceInstanceInfo() {
202+
return sourceInstanceInfo;
203+
}
204+
205+
public void setSourceInstanceInfo(InstanceInfo sourceInstanceInfo) {
206+
this.sourceInstanceInfo = sourceInstanceInfo;
207+
}
208+
209+
public InstanceInfo getTargetInstanceInfo() {
210+
return targetInstanceInfo;
211+
}
212+
213+
public void setTargetInstanceInfo(InstanceInfo targetInstanceInfo) {
214+
this.targetInstanceInfo = targetInstanceInfo;
215+
}
216+
177217
public String getStartTimeFormat() {
178218
SimpleDateFormat simpleDateFormat = new SimpleDateFormat(dateTimeFormat);
179219
return simpleDateFormat.format(startTime);

cachecloud-open-web/src/main/java/com/sohu/cache/redis/RedisClusterReshard.java

+54-42
Original file line numberDiff line numberDiff line change
@@ -166,20 +166,66 @@ public boolean execute() {
166166
* 将source中的startSlot到endSlot迁移到target
167167
*
168168
*/
169-
public boolean migrateSlot(long appId, long appAuditId, InstanceInfo sourceInstanceInfo, InstanceInfo targetInstanceInfo, int startSlot, int endSlot, boolean isPipelineMigrate) {
169+
// public boolean migrateSlotOld(long appId, long appAuditId, InstanceInfo sourceInstanceInfo, InstanceInfo targetInstanceInfo, int startSlot, int endSlot, PipelineEnum pipelineEnum) {
170+
// long startTime = System.currentTimeMillis();
171+
// InstanceReshardProcess instanceReshardProcess = saveInstanceReshardProcess(appId, appAuditId, sourceInstanceInfo, targetInstanceInfo, startSlot, endSlot, pipelineEnum);
172+
// //源和目标Jedis
173+
// Jedis sourceJedis = redisCenter.getJedis(appId, sourceInstanceInfo.getIp(), sourceInstanceInfo.getPort(), defaultTimeout, defaultTimeout);
174+
// Jedis targetJedis = redisCenter.getJedis(appId, targetInstanceInfo.getIp(), targetInstanceInfo.getPort(), defaultTimeout, defaultTimeout);
175+
// //逐个slot迁移
176+
// boolean hasError = false;
177+
// for (int slot = startSlot; slot <= endSlot; slot++) {
178+
// long slotStartTime = System.currentTimeMillis();
179+
// try {
180+
// instanceReshardProcessDao.updateMigratingSlot(instanceReshardProcess.getId(), slot);
181+
// //num是迁移key的总数
182+
// int num = migrateSlotData(appId, sourceJedis, targetJedis, slot, pipelineEnum);
183+
// instanceReshardProcessDao.increaseFinishSlotNum(instanceReshardProcess.getId());
184+
// logger.warn("clusterReshard:{}->{}, slot={}, keys={}, costTime={} ms", sourceInstanceInfo.getHostPort(),
185+
// targetInstanceInfo.getHostPort(), slot, num, (System.currentTimeMillis() - slotStartTime));
186+
// } catch (Exception e) {
187+
// logger.error(e.getMessage(), e);
188+
// hasError = true;
189+
// break;
190+
// }
191+
// }
192+
// long endTime = System.currentTimeMillis();
193+
// logger.warn("clusterReshard:{}->{}, slot:{}->{}, costTime={} ms", sourceInstanceInfo.getHostPort(),
194+
// targetInstanceInfo.getHostPort(), startSlot, endSlot, (endTime - startTime));
195+
// if (hasError) {
196+
// instanceReshardProcessDao.updateStatus(instanceReshardProcess.getId(), ReshardStatusEnum.ERROR.getValue());
197+
// return false;
198+
// } else {
199+
// instanceReshardProcessDao.updateStatus(instanceReshardProcess.getId(), ReshardStatusEnum.FINISH.getValue());
200+
// instanceReshardProcessDao.updateEndTime(instanceReshardProcess.getId(), new Date());
201+
// return true;
202+
// }
203+
// }
204+
205+
/**
206+
* 将source中的startSlot到endSlot迁移到target
207+
*
208+
*/
209+
public boolean migrateSlot(InstanceReshardProcess instanceReshardProcess) {
210+
long appId = instanceReshardProcess.getAppId();
211+
int migratingSlot = instanceReshardProcess.getMigratingSlot();
212+
int endSlot = instanceReshardProcess.getEndSlot();
213+
int isPipeline = instanceReshardProcess.getIsPipeline();
214+
InstanceInfo sourceInstanceInfo = instanceReshardProcess.getSourceInstanceInfo();
215+
InstanceInfo targetInstanceInfo = instanceReshardProcess.getTargetInstanceInfo();
216+
170217
long startTime = System.currentTimeMillis();
171-
InstanceReshardProcess instanceReshardProcess = saveInstanceReshardProcess(appId, appAuditId, sourceInstanceInfo, targetInstanceInfo, startSlot, endSlot);
172218
//源和目标Jedis
173219
Jedis sourceJedis = redisCenter.getJedis(appId, sourceInstanceInfo.getIp(), sourceInstanceInfo.getPort(), defaultTimeout, defaultTimeout);
174220
Jedis targetJedis = redisCenter.getJedis(appId, targetInstanceInfo.getIp(), targetInstanceInfo.getPort(), defaultTimeout, defaultTimeout);
175221
//逐个slot迁移
176222
boolean hasError = false;
177-
for (int slot = startSlot; slot <= endSlot; slot++) {
223+
for (int slot = migratingSlot; slot <= endSlot; slot++) {
178224
long slotStartTime = System.currentTimeMillis();
179225
try {
180226
instanceReshardProcessDao.updateMigratingSlot(instanceReshardProcess.getId(), slot);
181227
//num是迁移key的总数
182-
int num = migrateSlotData(appId, sourceJedis, targetJedis, slot, isPipelineMigrate);
228+
int num = migrateSlotData(appId, sourceJedis, targetJedis, slot, isPipeline);
183229
instanceReshardProcessDao.increaseFinishSlotNum(instanceReshardProcess.getId());
184230
logger.warn("clusterReshard:{}->{}, slot={}, keys={}, costTime={} ms", sourceInstanceInfo.getHostPort(),
185231
targetInstanceInfo.getHostPort(), slot, num, (System.currentTimeMillis() - slotStartTime));
@@ -191,7 +237,7 @@ public boolean migrateSlot(long appId, long appAuditId, InstanceInfo sourceInsta
191237
}
192238
long endTime = System.currentTimeMillis();
193239
logger.warn("clusterReshard:{}->{}, slot:{}->{}, costTime={} ms", sourceInstanceInfo.getHostPort(),
194-
targetInstanceInfo.getHostPort(), startSlot, endSlot, (endTime - startTime));
240+
targetInstanceInfo.getHostPort(), migratingSlot, endSlot, (endTime - startTime));
195241
if (hasError) {
196242
instanceReshardProcessDao.updateStatus(instanceReshardProcess.getId(), ReshardStatusEnum.ERROR.getValue());
197243
return false;
@@ -202,45 +248,11 @@ public boolean migrateSlot(long appId, long appAuditId, InstanceInfo sourceInsta
202248
}
203249
}
204250

205-
206-
/**
207-
* 保存进度
208-
* @param appId
209-
* @param appAuditId
210-
* @param sourceInstanceInfo
211-
* @param targetInstanceInfo
212-
* @param startSlot
213-
* @param endSlot
214-
* @return
215-
*/
216-
private InstanceReshardProcess saveInstanceReshardProcess(long appId, long appAuditId,
217-
InstanceInfo sourceInstanceInfo, InstanceInfo targetInstanceInfo, int startSlot, int endSlot) {
218-
Date now = new Date();
219-
InstanceReshardProcess instanceReshardProcess = new InstanceReshardProcess();
220-
instanceReshardProcess.setAppId(appId);
221-
instanceReshardProcess.setAuditId(appAuditId);
222-
instanceReshardProcess.setFinishSlotNum(0);
223-
instanceReshardProcess.setSourceInstanceId(sourceInstanceInfo.getId());
224-
instanceReshardProcess.setTargetInstanceId(targetInstanceInfo.getId());
225-
instanceReshardProcess.setMigratingSlot(startSlot);
226-
instanceReshardProcess.setStartSlot(startSlot);
227-
instanceReshardProcess.setEndSlot(endSlot);
228-
instanceReshardProcess.setStatus(ReshardStatusEnum.RUNNING.getValue());
229-
instanceReshardProcess.setStartTime(now);
230-
//用status控制显示结束时间
231-
instanceReshardProcess.setEndTime(now);
232-
instanceReshardProcess.setCreateTime(now);
233-
instanceReshardProcess.setUpdateTime(now);
234-
235-
instanceReshardProcessDao.save(instanceReshardProcess);
236-
return instanceReshardProcess;
237-
}
238-
239251
/**
240252
* 迁移slot数据,并稳定slot配置
241253
* @throws Exception
242254
*/
243-
private int moveSlotData(final long appId, final Jedis source, final Jedis target, final int slot, boolean isPipelineMigrate) throws Exception {
255+
private int moveSlotData(final long appId, final Jedis source, final Jedis target, final int slot, int isPipeline) throws Exception {
244256
int num = 0;
245257
while (true) {
246258
final Set<String> keys = new HashSet<String>();
@@ -322,7 +334,7 @@ public boolean execute() {
322334
* MIGRATE host port key destination-db timeout [COPY] [REPLACE]
323335
* CLUSTER SETSLOT <slot> NODE <node_id> 将槽 slot 指派给 node_id 指定的节点,如果槽已经指派给另一个节点,那么先让另一个节点删除该槽>,然后再进行指派。
324336
*/
325-
private int migrateSlotData(long appId, final Jedis source, final Jedis target, final int slot, boolean isPipelineMigrate) {
337+
private int migrateSlotData(long appId, final Jedis source, final Jedis target, final int slot, int isPipeline) {
326338
int num = 0;
327339
final String sourceNodeId = getNodeId(appId, source);
328340
final String targetNodeId = getNodeId(appId, target);
@@ -357,7 +369,7 @@ public boolean execute() {
357369
}
358370

359371
try {
360-
num = moveSlotData(appId, source, target, slot, isPipelineMigrate);
372+
num = moveSlotData(appId, source, target, slot, isPipeline);
361373
} catch (Exception e) {
362374
isError = true;
363375
logger.error(e.getMessage(), e);

cachecloud-open-web/src/main/java/com/sohu/cache/stats/app/AppDeployCenter.java

+7
Original file line numberDiff line numberDiff line change
@@ -108,6 +108,13 @@ public HorizontalResult checkHorizontal(long appId, long appAuditId, long source
108108
public HorizontalResult startHorizontal(long appId, long appAuditId, long sourceId, long targetId, int startSlot,
109109
int endSlot, int migrateType);
110110

111+
/**
112+
* 重试水平扩容
113+
* @param instanceReshardProcessId
114+
* @return
115+
*/
116+
public HorizontalResult retryHorizontal(final int instanceReshardProcessId);
117+
111118
/**
112119
* 添加cluster一个主(从)节点
113120
*

0 commit comments

Comments
 (0)