-
Notifications
You must be signed in to change notification settings - Fork 182
/
Copy pathScraper.java
89 lines (84 loc) · 4.31 KB
/
Scraper.java
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
import com.opencsv.CSVWriter;
import io.github.bonigarcia.wdm.WebDriverManager;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;
import org.openqa.selenium.*;
import org.openqa.selenium.edge.EdgeDriver;
import java.io.FileWriter;
import java.io.IOException;
import java.time.Duration;
import java.util.ArrayList;
import java.util.List;
public class Scraper {
private static final String USERNAME = ""; // Provide your LeetCode username
private static final String PASSWORD = ""; // Provide your LeetCode password
public static final int QUESTIONS_PAGE_WAIT_MILLIS = 25000;
public static final int LOGIN_PAGE_WAIT_MILLIS = 2000;
WebDriver driver;
List<String> companyURLs = new ArrayList<>();
public void setup() throws InterruptedException, IOException {
WebDriverManager.edgedriver().setup();
driver = new EdgeDriver();
driver.get("https://leetcode.com/accounts/login/");
driver.manage().timeouts().implicitlyWait(Duration.ofSeconds(1));
if (USERNAME.isEmpty() || PASSWORD.isEmpty()) {
throw new IllegalArgumentException("Username or password can't be empty");
}
driver.findElement(By.xpath("// *[ @ id = 'id_login']")).sendKeys(USERNAME);
driver.findElement(By.xpath("// *[ @ id = 'id_password']")).sendKeys(PASSWORD);
driver.findElement(By.xpath("// *[ @ id = 'id_password']")).sendKeys(Keys.ENTER);
Thread.sleep(LOGIN_PAGE_WAIT_MILLIS); // Wait for the login to happen, then visit the problems pages
driver.get("https://leetcode.com/problemset/all/");
List<WebElement> companies = driver.findElements(By.cssSelector(".mb-4.mr-3"));
for (WebElement company : companies) {
String link = company.getAttribute("href");
System.out.println(link);
companyURLs.add(link);
}
for (String companyURL : companyURLs) {
visitCompanies(companyURL);
}
}
private void visitCompanies(String companyURL) throws InterruptedException, IOException {
String companyName = companyURL.substring(companyURL.lastIndexOf("/") + 1);
System.out.println("Visiting " + companyURL);
driver.get(companyURL);
Thread.sleep(QUESTIONS_PAGE_WAIT_MILLIS); // Wait for the page to load, for companies like Google/Amazon, it takes a lot of time
String table = "";
try {
table = "<table>" + driver.findElement(By.className("table")).getAttribute("innerHTML") + "</table>";
} catch (NoSuchElementException ex) {
Thread.sleep(30000);
driver.get(companyURL);
Thread.sleep(30000);
table = "<table>" + driver.findElement(By.className("table")).getAttribute("innerHTML") + "</table>";
}
Document doc = Jsoup.parse(table); // parse the table html content
List<String[]> result = new ArrayList<>();
String[] header = new String[]{"ID", "Title", "URL", "Is Premium", "Acceptance %", "Difficulty", "Frequency %"};
result.add(header);
for (Element row : doc.getElementsByTag("tr")) {
Elements cols = row.getElementsByTag("td");
int size = cols.size();
if (size != 0) { // for <th> size would be 0
String id = cols.get(1).text();
String title = cols.get(2).text();
Elements href = cols.get(2).getElementsByAttribute("href");
boolean isPremium = !cols.get(2).getElementsByTag("i").isEmpty();
String problemUrl = href.get(0).attr("href");
String acceptance = cols.get(3).text();
String difficulty = cols.get(4).getElementsByTag("span").text();
String frequency = cols.get(5).getElementsByClass("progress-bar").attr("style");
// sample response "width: 29.1345%";
frequency = frequency.substring(frequency.indexOf(" ") + 1); // get the value after the first whitespace
String[] res = new String[]{id, title, problemUrl, isPremium ? "Y" : "N", acceptance, difficulty, frequency};
result.add(res);
}
}
try (CSVWriter csvWriter = new CSVWriter(new FileWriter(companyName + ".csv"))) {
csvWriter.writeAll(result);
}
}
}