Skip to content

Commit 19fa585

Browse files
committed
Break down method goal into small pieces for code simplicity
1 parent 971ca5b commit 19fa585

File tree

1 file changed

+26
-9
lines changed

1 file changed

+26
-9
lines changed

webmagic-core/src/main/java/us/codecraft/webmagic/selector/SmartContentSelector.java

+26-9
Original file line numberDiff line numberDiff line change
@@ -51,22 +51,21 @@ public String select(String html) {
5151
text.setLength(0);
5252

5353
for (int i = 0; i < indexDistribution.size() - 1; i++) {
54-
if (indexDistribution.get(i) > threshold && ! boolstart) {
55-
if (indexDistribution.get(i+1).intValue() != 0
56-
|| indexDistribution.get(i+2).intValue() != 0
57-
|| indexDistribution.get(i+3).intValue() != 0) {
54+
if (indexDistribution.get(i) > threshold && ! boolstart
55+
&& !isAnyIndexDistributionZero(indexDistribution,i+1,i+2,i+3)){
5856
boolstart = true;
5957
start = i;
6058
continue;
6159
}
6260
}
63-
if (boolstart) {
64-
if (indexDistribution.get(i).intValue() == 0
65-
|| indexDistribution.get(i+1).intValue() == 0) {
61+
if (boolstart && isAnyIndexDistributionZero (indexDistribution,i,i+1)) {
62+
6663
end = i;
6764
boolend = true;
68-
}
65+
6966
}
67+
68+
7069
StringBuilder tmp = new StringBuilder();
7170
if (boolend) {
7271
//System.out.println(start+1 + "\t\t" + end+1);
@@ -83,9 +82,27 @@ public String select(String html) {
8382
}
8483
return text.toString();
8584
}
86-
85+
86+
8787
@Override
8888
public List<String> selectList(String text) {
8989
throw new UnsupportedOperationException();
9090
}
91+
92+
private static boolean isAnyIndexDistributionZero( ArrayList <Integer> indexDistribution, int index, int successorIndex, int afterSuccessorIndex = null) {
93+
94+
95+
if (afterSuccessorIndex != null) {
96+
return (indexDistribution.get(index).intValue() == 0
97+
&& indexDistribution.get(indexSuccessor).intValue() == 0
98+
&& indexDistribution.get(afterSuccessorIndex).intValue() == 0 );
99+
}else {
100+
return (indexDistribution.get(index).intValue() == 0
101+
|| indexDistribution.get(indexSuccessor).intValue() == 0);
102+
}
103+
104+
}
105+
106+
107+
91108
}

0 commit comments

Comments
 (0)