From 597a7b3903b7c7cd99f2c826a0c8151c347a8816 Mon Sep 17 00:00:00 2001 From: 4pr0n Date: Thu, 13 Mar 2014 20:26:23 -0700 Subject: [PATCH] Xhamster support --- .../ripme/ripper/rippers/XhamsterRipper.java | 84 +++++++++++++++++++ .../ripper/rippers/XhamsterRipperTest.java | 32 +++++++ 2 files changed, 116 insertions(+) create mode 100644 src/main/java/com/rarchives/ripme/ripper/rippers/XhamsterRipper.java create mode 100644 src/test/java/com/rarchives/ripme/tst/ripper/rippers/XhamsterRipperTest.java diff --git a/src/main/java/com/rarchives/ripme/ripper/rippers/XhamsterRipper.java b/src/main/java/com/rarchives/ripme/ripper/rippers/XhamsterRipper.java new file mode 100644 index 000000000..e692e5a2b --- /dev/null +++ b/src/main/java/com/rarchives/ripme/ripper/rippers/XhamsterRipper.java @@ -0,0 +1,84 @@ +package com.rarchives.ripme.ripper.rippers; + +import java.io.IOException; +import java.net.MalformedURLException; +import java.net.URL; +import java.util.regex.Matcher; +import java.util.regex.Pattern; + +import org.apache.log4j.Logger; +import org.jsoup.Jsoup; +import org.jsoup.nodes.Document; +import org.jsoup.nodes.Element; + +import com.rarchives.ripme.ripper.AbstractRipper; + +public class XhamsterRipper extends AbstractRipper { + + private static final String DOMAIN = "xhamster.com", + HOST = "xhamster"; + private static final Logger logger = Logger.getLogger(XhamsterRipper.class); + + public XhamsterRipper(URL url) throws IOException { + super(url); + } + + @Override + public boolean canRip(URL url) { + return url.getHost().endsWith(DOMAIN); + } + + @Override + public URL sanitizeURL(URL url) throws MalformedURLException { + return url; + } + + @Override + public void rip() throws IOException { + int index = 0; + String nextURL = this.url.toExternalForm(); + while (nextURL != null) { + logger.info(" Retrieving " + nextURL); + Document doc = Jsoup.connect(nextURL).get(); + for (Element thumb : doc.select("table.iListing div.img img")) { + if (!thumb.hasAttr("src")) { + continue; + } + String image = thumb.attr("src"); + image = image.replaceAll( + "http://p[0-9]*\\.", + "http://up."); + image = image.replaceAll( + "_160\\.", + "_1000."); + index += 1; + addURLToDownload(new URL(image), String.format("%03d_", index)); + } + nextURL = null; + for (Element element : doc.select("a.last")) { + nextURL = element.attr("href"); + break; + } + } + waitForThreads(); + } + + @Override + public String getHost() { + return HOST; + } + + @Override + public String getGID(URL url) throws MalformedURLException { + Pattern p = Pattern.compile("^https?://([a-z0-9.]*?)xhamster\\.com/photos/gallery/([0-9]{1,})/.*\\.html"); + Matcher m = p.matcher(url.toExternalForm()); + if (m.matches()) { + return m.group(2); + } + throw new MalformedURLException( + "Expected xhamster.com gallery formats: " + + "xhamster.com/photos/gallery/#####/xxxxx..html" + + " Got: " + url); + } + +} diff --git a/src/test/java/com/rarchives/ripme/tst/ripper/rippers/XhamsterRipperTest.java b/src/test/java/com/rarchives/ripme/tst/ripper/rippers/XhamsterRipperTest.java new file mode 100644 index 000000000..b5443a6ab --- /dev/null +++ b/src/test/java/com/rarchives/ripme/tst/ripper/rippers/XhamsterRipperTest.java @@ -0,0 +1,32 @@ +package com.rarchives.ripme.tst.ripper.rippers; + +import java.io.IOException; +import java.net.URL; +import java.util.ArrayList; +import java.util.List; + +import com.rarchives.ripme.ripper.rippers.XhamsterRipper; + +public class XhamsterRipperTest extends RippersTest { + + public void testXhamsterAlbums() throws IOException { + if (!DOWNLOAD_CONTENT) { + return; + } + List contentURLs = new ArrayList(); + contentURLs.add(new URL("http://xhamster.com/photos/gallery/1462237/alyssa_gadson.html")); + contentURLs.add(new URL("http://xhamster.com/photos/gallery/2941201/tableau_d_039_art_ii.html")); + for (URL url : contentURLs) { + try { + XhamsterRipper ripper = new XhamsterRipper(url); + ripper.rip(); + assert(ripper.getWorkingDir().listFiles().length > 1); + deleteDir(ripper.getWorkingDir()); + } catch (Exception e) { + e.printStackTrace(); + fail("Error while ripping URL " + url + ": " + e.getMessage()); + } + } + } + +}