diff --git a/src/main/java/lan/dk/podcastserver/manager/worker/selector/update/BeInSportUpdaterCompatibility.java b/src/main/java/lan/dk/podcastserver/manager/worker/selector/update/BeInSportUpdaterCompatibility.java index 9c0745633..4df9c4396 100644 --- a/src/main/java/lan/dk/podcastserver/manager/worker/selector/update/BeInSportUpdaterCompatibility.java +++ b/src/main/java/lan/dk/podcastserver/manager/worker/selector/update/BeInSportUpdaterCompatibility.java @@ -10,7 +10,7 @@ public class BeInSportUpdaterCompatibility implements UpdaterCompatibility { @Override public Integer compatibility(String url) { - return url != null && url.contains("beinsports.fr") + return url != null && url.contains("beinsports.com") ? 1 : Integer.MAX_VALUE; } diff --git a/src/main/java/lan/dk/podcastserver/manager/worker/updater/BeInSportsUpdater.java b/src/main/java/lan/dk/podcastserver/manager/worker/updater/BeInSportsUpdater.java index 23c3035da..4af6e7a06 100644 --- a/src/main/java/lan/dk/podcastserver/manager/worker/updater/BeInSportsUpdater.java +++ b/src/main/java/lan/dk/podcastserver/manager/worker/updater/BeInSportsUpdater.java @@ -31,13 +31,11 @@ public class BeInSportsUpdater extends AbstractUpdater { /* Patter to extract value from URL */ - private static final String ATTRIBUTE_EXTRACTOR_FROM_JAVASCRIPT_VALUE = ".*\"%s\": \"([^\"]*)\".*"; + private static final String ATTRIBUTE_EXTRACTOR_FROM_JAVASCRIPT_VALUE = ".*\"%s\":\"([^\"]*)\".*"; private static final String PARAMETER_SEPARATOR = "?"; - private static final String EPISODE_LISTING_URL = "http://www.beinsports.fr/ajax/filter-videos/siteSection/replay/filterBySelect/%s/ajaxSection/integrales"; - private static final String VIDEO_ARTICLE_URL_FORMAT = "http://www.beinsports.fr/ajax/swap-video/article/%s"; - private static final Pattern DATE_PATTERN = Pattern.compile(".*[(]([^)]*)[)].*"); - private static final Pattern STREAM_HLS_URL_EXTRACTOR_PATTERN1 = Pattern.compile(String.format(ATTRIBUTE_EXTRACTOR_FROM_JAVASCRIPT_VALUE, "stream_hls_url")); - private static final Pattern THUMB_NAIL_EXTRACTOR_PATTERN = Pattern.compile(String.format(ATTRIBUTE_EXTRACTOR_FROM_JAVASCRIPT_VALUE, "thumbnail_large_url")); + private static final Pattern STREAM_720_URL_EXTRACTOR_PATTERN1 = Pattern.compile(String.format(ATTRIBUTE_EXTRACTOR_FROM_JAVASCRIPT_VALUE, "url")); + private static final Pattern POSTER_URL_EXTRACTOR_PATTERN = Pattern.compile(String.format(ATTRIBUTE_EXTRACTOR_FROM_JAVASCRIPT_VALUE, "poster_url")); + private static final String beInSportsDomain = "http://www.beinsports.com/%s"; @Resource JdomService jdomService; @Resource HtmlService htmlService; @@ -46,11 +44,10 @@ public class BeInSportsUpdater extends AbstractUpdater { public Set getItems(Podcast podcast) { Document page; Set itemSet = new HashSet<>(); - String listingUrl = getListingUrl(podcast); + String listingUrl = podcast.getUrl(); try { - Connection.Response response = htmlService.connectWithDefault(listingUrl) - .execute(); + Connection.Response response = htmlService.connectWithDefault(listingUrl).execute(); page = response.parse(); } catch (IOException e) { e.printStackTrace(); //To change body of catch statement use File | Settings | File Templates. @@ -60,39 +57,27 @@ public Set getItems(Podcast podcast) { for(Element article : page.select("article")) { Item item = new Item() - .setTitle(article.select("h4").first().text()) - .setDescription(article.select("h4").first().text()) - .setPubdate(getPubDateFromDescription(article.select("h4").first().text())); + .setTitle(article.select("h3").first().text()) + .setDescription(article.select("h3").first().text()) + .setPubdate(getPubDateFromDescription(article.select("time").first().attr("datetime"))); - item = getDetailOfItemByXML(item, StringUtils.substringAfterLast(article.select("a").first().attr("href"), "/")); + item = getDetailsByJavascript(item, String.format(beInSportsDomain, article.select("a").first().attr("data-url"))); itemSet.add(item); } return itemSet; } - private ZonedDateTime getPubDateFromDescription(String title) { - /* L'Expresso (26/02) - 1\u00e8re partie */ - Matcher m = DATE_PATTERN.matcher(title); - - if (!m.find()) { - return null; - } - - String dayMonth = m.group(1); - return ZonedDateTime - .now() - .withMonth(Integer.valueOf(StringUtils.substringAfter(dayMonth, "/"))) - .withDayOfMonth(Integer.valueOf(StringUtils.substringBefore(dayMonth, "/"))) - .withHour(8) - .withMinute(0); + private ZonedDateTime getPubDateFromDescription(String dateString) { + /* 2015-09-04T04:26:18+00:00 */ + return ZonedDateTime.parse(dateString); } - private Item getDetailOfItemByXML(Item item, String urlItemBeInSport) { + private Item getDetailsByJavascript(Item item, String urlItemBeInSport) { String javascriptCode; try { - Connection.Response response = htmlService.connectWithDefault(String.format(VIDEO_ARTICLE_URL_FORMAT, urlItemBeInSport)) + Connection.Response response = htmlService.connectWithDefault(urlItemBeInSport) .execute(); Document articlePage = response.parse(); String apiItemUrl = articlePage.select("iframe").attr("src"); @@ -101,20 +86,20 @@ private Item getDetailOfItemByXML(Item item, String urlItemBeInSport) { .execute(); javascriptCode = getJavascriptPart(response.parse().select("script")); } catch (IOException | IllegalArgumentException e) { - logger.error("Error during fetch of {}", String.format(VIDEO_ARTICLE_URL_FORMAT, urlItemBeInSport), e); + logger.error("Error during fetch of {}", urlItemBeInSport, e); return new Item(); } - Matcher matcher = STREAM_HLS_URL_EXTRACTOR_PATTERN1.matcher(javascriptCode); + Matcher matcher = STREAM_720_URL_EXTRACTOR_PATTERN1.matcher(javascriptCode); if (matcher.find()) { - item.setUrl(matcher.group(1)); + item.setUrl(matcher.group(1).replace("\\", "")); } - Matcher thumNailematcher = THUMB_NAIL_EXTRACTOR_PATTERN.matcher(javascriptCode); + Matcher thumNailematcher = POSTER_URL_EXTRACTOR_PATTERN.matcher(javascriptCode); if (thumNailematcher.find()) { try { - item.setCover(imageService.getCoverFromURL(thumNailematcher.group(1))); + item.setCover(imageService.getCoverFromURL(thumNailematcher.group(1).replace("\\", ""))); } catch (IOException e) { e.printStackTrace(); } @@ -126,27 +111,26 @@ private Item getDetailOfItemByXML(Item item, String urlItemBeInSport) { private String getJavascriptPart(Elements tagScripts) { return tagScripts.stream() .map(Element::data) - .filter(data -> data.contains("stream_hls_url")) + .filter(data -> data.contains("720")) .findFirst() .orElse(""); } @Override public String signatureOf(Podcast podcast) { - String listingUrl = getListingUrl(podcast); + String listingUrl = podcast.getUrl(); + /* cluster_video */ - if (!listingUrl.equals("")) { - return signatureService.generateSignatureFromURL(podcast.getUrl()); - } else { - return ""; + try { + Document page = htmlService.connectWithDefault(listingUrl).execute().parse(); + return signatureService.generateMD5Signature(page.select("cluster_video").html()); + } catch (IOException e) { + logger.error("IOException :", e); } - } - private String getListingUrl(Podcast podcast) { - String idShow = StringUtils.substringAfterLast(podcast.getUrl(), "/"); - return String.format(EPISODE_LISTING_URL, idShow); + return ""; } - + public Boolean podcastContains(Podcast podcast, Item item) { if (item.getUrl() == null) return false; diff --git a/src/test/integration/lan/dk/podcastserver/worker/BeInSportWorker.java b/src/test/integration/lan/dk/podcastserver/worker/BeInSportWorker.java index 21a2c5a32..243436fc1 100644 --- a/src/test/integration/lan/dk/podcastserver/worker/BeInSportWorker.java +++ b/src/test/integration/lan/dk/podcastserver/worker/BeInSportWorker.java @@ -1,48 +1,60 @@ package lan.dk.podcastserver.worker; +import lan.dk.podcastserver.context.ValidatorConfig; +import lan.dk.podcastserver.entity.Podcast; import lan.dk.podcastserver.manager.worker.updater.BeInSportsUpdater; +import lan.dk.podcastserver.service.*; import org.junit.Ignore; +import org.junit.Test; import org.junit.runner.RunWith; import org.slf4j.Logger; import org.slf4j.LoggerFactory; +import org.springframework.beans.factory.annotation.Autowired; import org.springframework.test.context.ContextConfiguration; import org.springframework.test.context.junit4.SpringJUnit4ClassRunner; import org.springframework.test.context.support.AnnotationConfigContextLoader; +import static org.assertj.core.api.Assertions.assertThat; + /** * Created by kevin on 22/02/2014. */ @RunWith(SpringJUnit4ClassRunner.class) -@ContextConfiguration(classes = {/*PropertyConfig.class*/}, loader=AnnotationConfigContextLoader.class) +@ContextConfiguration(classes = {ValidatorConfig.class, PodcastServerParameters.class, SignatureService.class, UrlService.class, JdomService.class, MimeTypeService.class, HtmlService.class, ImageService.class}, loader=AnnotationConfigContextLoader.class) @Ignore public class BeInSportWorker { private final Logger logger = LoggerFactory.getLogger(BeInSportWorker.class); - BeInSportsUpdater beInSportsUpdater = new BeInSportsUpdater(); + @Autowired BeInSportsUpdater beInSportsUpdater; -/* @Test public void signatureFeedExpresso() { - Podcast lexpresso = new Podcast("L'expresso", "http://www.beinsports.fr/replay/category/3361/name/lexpresso", - "", "BeInSport", new Timestamp(System.currentTimeMillis()), null, new Cover("http://www.beinsports.fr/di/library/bein/52/dd/lexpresso_xyp5eq14bu9m1o275gi8i1xlb.jpg?t=1074981292", 250, 166), null, true); + Podcast lexpresso = new Podcast(); + lexpresso.setTitle("L'Expresso") + .setUrl("http://www.beinsports.com/france/replay/lexpresso"); - String signature = beInSportUpdater.signatureOf(lexpresso); + String signature = beInSportsUpdater.signatureOf(lexpresso); logger.info("Signature 1 : {}", signature); - String signature2 = beInSportUpdater.signatureOf(lexpresso); + String signature2 = beInSportsUpdater.signatureOf(lexpresso); logger.info("Signature 2 : {}", signature2); - Assert.assertEquals(signature, signature2); + assertThat(signature).isEqualTo(signature2); } @Test public void updateFeedExpresso() { - Podcast lexpresso = new Podcast("L'expresso", "http://www.beinsports.fr/replay/category/3361/name/lexpresso", - "", "BeInSport", new Timestamp(System.currentTimeMillis()), null, new Cover("http://www.beinsports.fr/di/library/bein/52/dd/lexpresso_xyp5eq14bu9m1o275gi8i1xlb.jpg?t=1074981292", 250, 166), null, true); + Podcast lexpresso = new Podcast(); + lexpresso.setTitle("L'Expresso") + .setUrl("http://www.beinsports.com/france/replay/lexpresso"); + - beInSportUpdater.updateAndAddItems(lexpresso); - }*/ + /*"L'expresso", "http://www.beinsports.fr/replay/category/3361/name/lexpresso", + "", "BeInSport", new Timestamp(System.currentTimeMillis()), null, new Cover("http://www.beinsports.fr/di/library/bein/52/dd/lexpresso_xyp5eq14bu9m1o275gi8i1xlb.jpg?t=1074981292", 250, 166), null, true);*/ + + beInSportsUpdater.getItems(lexpresso); + } }