Skip to content

Commit 34b791b

Browse files
author
Carter Hollman
committed
First working implementation of handleSearch, works with Indeed
1 parent 9cb53b0 commit 34b791b

File tree

2 files changed

+83
-0
lines changed

2 files changed

+83
-0
lines changed

build.gradle

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,7 @@ ext {
3030
}
3131

3232
dependencies {
33+
implementation 'org.seleniumhq.selenium:selenium-java:4.25.0'
3334
implementation 'org.springframework.boot:spring-boot-starter-data-jpa'
3435
implementation 'org.springframework.boot:spring-boot-starter-web'
3536
developmentOnly 'org.springframework.boot:spring-boot-docker-compose'
Lines changed: 82 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,82 @@
1+
package org.codedevils.scraper.cd_scraper.search;
2+
3+
import org.openqa.selenium.By;
4+
import org.openqa.selenium.WebElement;
5+
import org.openqa.selenium.WebDriver;
6+
import org.openqa.selenium.chrome.ChromeDriver;
7+
import java.time.Duration;
8+
import java.util.ArrayList;
9+
import java.util.List;
10+
11+
public class handleSearch{
12+
public static ArrayList<String> handleSearch(String link, String criteria){
13+
//New WebDriver with implicit wait of 5 seconds if elements aren't found on page
14+
WebDriver driver = new ChromeDriver();
15+
driver.manage().timeouts().implicitlyWait(Duration.ofSeconds(5));
16+
17+
//Setup page and declare search variable cuz scope and stuff
18+
driver.get(link);
19+
WebElement searchField;
20+
21+
22+
//Try to find the search field via XPATH, to add functionality for more websites can add their specific XPATH to
23+
//try block via concatenation of the following string " | <xpath>"
24+
try{
25+
searchField = driver.findElement(By.xpath(
26+
"//input[@type='text' or @type='search' or @placeholder[contains(., 'Search')]]"
27+
));
28+
}catch(Exception e){
29+
return null;
30+
}
31+
32+
//Since we found a search field, clear it, input criteria, submit to navigate to next page
33+
searchField.clear();
34+
searchField.sendKeys(criteria);
35+
searchField.submit();
36+
37+
38+
39+
//Should now be on page where we can scrape for the links
40+
ArrayList<String> links = new ArrayList<>();
41+
42+
43+
//Loop is for changing pages if necessary
44+
boolean next = true;
45+
while(next) {
46+
47+
//If we don't find a next button then we break the loop of changing pages
48+
WebElement nextButton = null;
49+
try{
50+
nextButton = driver.findElement(By.xpath("//a[@data-testid='pagination-page-next']"));
51+
}catch(Exception e){
52+
next = false;
53+
}
54+
55+
56+
57+
//Isolate individual result elements into a list to iterate over
58+
List<WebElement> results = driver.findElements(By.xpath("//li[@class='css-1ac2h1w eu4oa1w0']"));
59+
60+
//For every individual result, find its anchor tag and extract the href attribute.
61+
//That attribute holds the link we are looking for, so we add to outgoing list :)
62+
//NOTE: Some elements in the results do not have link or are filler try/catch ignores them
63+
for (WebElement result : results) {
64+
try {
65+
String temp = result.findElement(By.tagName("a")).getAttribute("href");
66+
links.add(temp);
67+
} catch (Exception ignored) {
68+
69+
}
70+
}
71+
72+
//Continue through the pages :)
73+
if(nextButton != null)
74+
driver.navigate().to(nextButton.getAttribute("href"));
75+
}
76+
77+
//All done, success!!!!!
78+
driver.quit();
79+
return links;
80+
}
81+
}
82+

0 commit comments

Comments
 (0)