From e5b4049a11f87c98ae7d5c977422fb796e4d3f6b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?I=C3=B1aki=20San=20Vicente?= Date: Thu, 16 Nov 2023 00:44:03 +0100 Subject: [PATCH] updated selenium code --- pom.xml | 16 ++++-- src/main/java/elh/eus/MSM/Feed.java | 2 +- src/main/java/elh/eus/MSM/FeedCredential.java | 12 ++++- src/main/java/elh/eus/MSM/FeedReader.java | 52 +++++++++++++------ 4 files changed, 60 insertions(+), 22 deletions(-) diff --git a/pom.xml b/pom.xml index 9740316..3109853 100644 --- a/pom.xml +++ b/pom.xml @@ -59,7 +59,7 @@ org.apache.httpcomponents httpclient - 4.5.13 + 4.5.14 jaxen @@ -156,22 +156,27 @@ org.seleniumhq.selenium selenium-java - 4.3.0 + 4.7.0 org.seleniumhq.selenium selenium-api - 4.3.0 + 4.7.0 org.seleniumhq.selenium selenium-chrome-driver - 4.3.0 + 4.7.0 org.seleniumhq.selenium selenium-support - 4.3.0 + 4.7.0 + + + org.seleniumhq.selenium + selenium-http-jdk-client + 4.7.0 com.google.guava @@ -264,6 +269,7 @@ elh.eus.MSM.CLI + diff --git a/src/main/java/elh/eus/MSM/Feed.java b/src/main/java/elh/eus/MSM/Feed.java index 547732a..c1882d7 100644 --- a/src/main/java/elh/eus/MSM/Feed.java +++ b/src/main/java/elh/eus/MSM/Feed.java @@ -215,7 +215,7 @@ public static Set retrieveFromDB(Connection conn, String type, String tabl if (rs.getString("f.login_url") != null) { FeedCredential cred = new FeedCredential(rs.getString("f.url"), rs.getString("f.login_url"), rs.getString("f.login_username"), - rs.getString("f.login_passwd"), rs.getString("f.login_usr_field"),rs.getString("f.login_passwd_field"), + rs.getString("f.login_passwd"), rs.getString("f.login_usr_field"),rs.getString("f.login_passwd_field"),rs.getString("f.logged_check_field"), rs.getString("f.login_cookie_button")); src.setLoginCredentials(cred); diff --git a/src/main/java/elh/eus/MSM/FeedCredential.java b/src/main/java/elh/eus/MSM/FeedCredential.java index 1d31046..8d7706e 100644 --- a/src/main/java/elh/eus/MSM/FeedCredential.java +++ b/src/main/java/elh/eus/MSM/FeedCredential.java @@ -8,15 +8,17 @@ public class FeedCredential { private String ssopass; private String userField; private String passField; + private String loggedCheckField; private String cookieNotice; - public FeedCredential(String domain, String ssourl, String ssouser, String ssopass, String userField, String passField, String cookieNotice) { + public FeedCredential(String domain, String ssourl, String ssouser, String ssopass, String userField, String passField, String loggedFieldCheck, String cookieNotice) { this.setDomain(domain); this.setSsourl(ssourl); this.setSsouser(ssouser); this.setSsopass(ssopass); this.setUserField(userField); this.setPassField(passField); + this.setLoggedCheckField(loggedFieldCheck); this.setCookieNotice(cookieNotice); } @@ -68,6 +70,14 @@ public void setPassField(String passField) { this.passField = passField; } + public String getLoggedCheckField() { + return loggedCheckField; + } + + public void setLoggedCheckField(String loggedField) { + this.loggedCheckField = loggedField; + } + public String getCookieNotice() { return cookieNotice; } diff --git a/src/main/java/elh/eus/MSM/FeedReader.java b/src/main/java/elh/eus/MSM/FeedReader.java index a04bbf7..9c1f061 100644 --- a/src/main/java/elh/eus/MSM/FeedReader.java +++ b/src/main/java/elh/eus/MSM/FeedReader.java @@ -188,8 +188,8 @@ public void setFeeds(Set flist){ this.feeds=flist; } - public void addCredential(String domain, String ssourl, String ssouser, String ssopass, String userField, String passField, String cookieNotice) { - this.credentials.put(domain, new FeedCredential(domain,ssourl,ssouser,ssopass,userField,passField,cookieNotice)); + public void addCredential(String domain, String ssourl, String ssouser, String ssopass, String userField, String passField, String loggedCheckField, String cookieNotice) { + this.credentials.put(domain, new FeedCredential(domain,ssourl,ssouser,ssopass,userField,passField,loggedCheckField,cookieNotice)); } /** @@ -371,7 +371,11 @@ public void processFeeds(String store, String type, String ffmpeg) case "press": for (Feed f : getFeeds()) { + try{ getRssFeed(f, store); + }catch (Exception e){ + System.err.println("FeadReader::processFeeds -> Feed processed with errors: (" +f.getFeedURL()+") :\n "+e.getMessage()); + } } break; case "multimedia": @@ -566,13 +570,19 @@ private void getRssFeed (Feed f, String store){ // parse the document into boilerpipe's internal data structure //final InputSource is = HTMLFetcher.fetch(linkSrc).toInputSource(); String docXml = ""; - if (subscription) { + + try{ + if (subscription) { seleniumDriver.get(link); docXml = seleniumDriver.getPageSource(); - } - else { + } + else { docXml = fetchHTML(linkSrc,cookieStore); + } + }catch (TimeoutException tel){ + System.err.println("FeadReader::getRssFeed -> (Selenium) timeout when trying to get link: "+link+" \n "+tel.getMessage()); + continue; } final HTMLDocument is=new HTMLDocument(docXml); @@ -636,15 +646,17 @@ private void getRssFeed (Feed f, String store){ } catch (AuthenticationException ae) { System.err.println("FeadReader::getRssFeed -> HTTP client authentication error whe reading a feed entry (" +link+") :\n "+ae.getMessage()); ae.printStackTrace(); - } catch (TimeoutException te3){ - System.err.println("FeadReader::getRssFeed -> selenium timeout when trying to get link: "+link+" \n "+te3.getMessage()); } } System.err.println("FeadReader::getRssFeed -> found "+newEnts+" new entries "); // terminates driver session and closes all windows - if (subscription){ - seleniumDriver.quit(); + if (subscription){ + try { + seleniumDriver.quit(); + }catch (Exception e){ + System.err.println("FeadReader::getRssFeed -> Failed to close selenium session properly, continuing anyways. Feed entry (" +link+") :\n "+e.getMessage()); + } } try { @@ -1088,6 +1100,7 @@ private void loadCredentials(String property) { String login_pass = rs.getString("login_passwd"); String login_user_field = rs.getString("login_usr_field"); String login_pass_field = rs.getString("login_passwd_field"); + String logged_check_field = rs.getString("logged_check_field"); String login_cookie_button = rs.getString("login_cookie_button"); String query2 = "SELECT domain FROM " @@ -1102,7 +1115,7 @@ private void loadCredentials(String property) { } if (! fdomain.equalsIgnoreCase("")) { // domain, String ssourl, String ssouser, String ssopass, String userField, String passField, String cookieNotice - addCredential(fdomain, login_url, login_user, login_pass, login_user_field,login_pass_field,login_cookie_button); + addCredential(fdomain, login_url, login_user, login_pass, login_user_field,login_pass_field,logged_check_field, login_cookie_button); } st2.close(); } @@ -1121,10 +1134,10 @@ private void loadCredentials(String property) { for (String cred : allCredentials) { String[] split = cred.split("::"); if (split.length < 7) { - System.err.println("MSM::FeedReader - Invalid credential, credential string format must be as follows: domain::ssourl::ssouser:ssopass::userfield::passfield::cookienotice ->"+split.length+" "+split[0]); + System.err.println("MSM::FeedReader - Invalid credential, credential string format must be as follows: domain::ssourl::ssouser:ssopass::userfield::passfield::loggedCheckField::cookienotice ->"+split.length+" "+split[0]); } else { - addCredential(split[0], split[1], split[2], split[3],split[4],split[5],split[6]); + addCredential(split[0], split[1], split[2], split[3],split[4],split[5],split[6],split[7]); } } } @@ -1255,6 +1268,7 @@ private String fetchHTML(URL linkSrc, CookieStore cst) throws IOException, URISy boolean startSelenium(FeedCredential cred) { System.setProperty("webdriver.chrome.driver",params.getProperty("chromedriverPath", "chromedriver")); + System.setProperty("webdriver.http.factory", "jdk-http-client"); //System.setProperty("webdriver.chrome.bin", "/usr/bin/google-chrome-beta"); ChromeOptions seleniumOptions = new ChromeOptions(); String seleniumOpts=params.getProperty("seleniumOptions",""); @@ -1263,8 +1277,9 @@ boolean startSelenium(FeedCredential cred) seleniumOptions.addArguments(o); } } - seleniumOptions.setBinary("/usr/bin/google-chrome-beta"); - + + seleniumOptions.setBinary(params.getProperty("chromePath", "/usr/bin/google-chrome-beta")); + seleniumDriver=new ChromeDriver(seleniumOptions); try { @@ -1280,7 +1295,7 @@ boolean startSelenium(FeedCredential cred) } } - WebDriverWait wait = new WebDriverWait(seleniumDriver, Duration.ofSeconds(30)); + WebDriverWait wait = new WebDriverWait(seleniumDriver, Duration.ofSeconds(60)); // if there is a cookie accepting notice wait until is ready and click to accept if (! cred.getCookieNotice().equalsIgnoreCase("none")) { try { @@ -1306,11 +1321,18 @@ boolean startSelenium(FeedCredential cred) seleniumDriver.findElement(By.id(cred.getUserField())).sendKeys(cred.getSsouser()); //pass seleniumDriver.findElement(By.id(cred.getPassField())).sendKeys(cred.getSsopass() + Keys.ENTER); + }catch (ElementNotInteractableException nie){ System.err.println("FeadReader::getRssFeed -> selenium found an element not clickable, proceeding without login"); return false; } + try { + wait.until(ExpectedConditions.visibilityOfElementLocated(By.xpath(cred.getLoggedCheckField()))); + }catch (ElementNotInteractableException nie){ + System.err.println("FeadReader::startSelenium -> element indicating succesfull login not found, proceeding anyway"); + } + return true; }