summaryrefslogtreecommitdiff
path: root/lib/Multi
diff options
context:
space:
mode:
authorYorhel <git@yorhel.nl>2019-08-25 13:25:50 +0200
committerYorhel <git@yorhel.nl>2019-08-25 14:16:52 +0200
commit1befcd9ef905f114c8e9b8408292c110d58f992e (patch)
tree4d089e4e00feaf2c61a256c28df525436499a51f /lib/Multi
parent8f836645ba4e5353dae466a0e184bef46cd95765 (diff)
Add DLsite price & shop info crawler
Diffstat (limited to 'lib/Multi')
-rw-r--r--lib/Multi/DLsite.pm95
1 files changed, 95 insertions, 0 deletions
diff --git a/lib/Multi/DLsite.pm b/lib/Multi/DLsite.pm
new file mode 100644
index 00000000..c6aae8f7
--- /dev/null
+++ b/lib/Multi/DLsite.pm
@@ -0,0 +1,95 @@
+package Multi::DLsite;
+
+use strict;
+use warnings;
+use utf8;
+use Encode 'decode_utf8';
+use Multi::Core;
+use AnyEvent::HTTP;
+
+
+my %C = (
+ url => 'https://www.dlsite.com/%s/work/=/product_id/%s.html',
+ clean_timeout => 48*3600,
+ check_timeout => 10*60,
+);
+
+
+sub run {
+ shift;
+ $C{ua} = "VNDB.org Affiliate Crawler (Multi v$VNDB::S{version}; contact\@vndb.org)";
+ %C = (%C, @_);
+
+ push_watcher schedule 0, $C{clean_timeout}, sub {
+ pg_cmd q{DELETE FROM shop_dlsite WHERE id NOT IN(
+ SELECT l_dlsite FROM releases WHERE NOT hidden
+ UNION ALL
+ SELECT l_dlsiteen FROM releases WHERE NOT hidden)};
+ };
+ push_watcher schedule 0, $C{check_timeout}, sub {
+ pg_cmd q{
+ INSERT INTO shop_dlsite (id)
+ SELECT DISTINCT l_dlsite
+ FROM releases
+ WHERE NOT hidden AND l_dlsite <> ''
+ AND NOT EXISTS(SELECT 1 FROM shop_dlsite WHERE id = l_dlsite)
+ }, [], sub {
+ pg_cmd q{
+ INSERT INTO shop_dlsite (id)
+ SELECT DISTINCT l_dlsiteen
+ FROM releases
+ WHERE NOT hidden AND l_dlsiteen <> ''
+ AND NOT EXISTS(SELECT 1 FROM shop_dlsite WHERE id = l_dlsiteen)
+ }, [], \&sync
+ }
+ }
+}
+
+
+sub data {
+ my($shop, $time, $id, $body, $hdr) = @_;
+ my $prefix = sprintf '[%.1fs] %s', $time, $id;
+ #use Data::Dumper 'Dumper'; AE::log warn => Dumper $hdr, $body; exit;
+ return AE::log warn => "$prefix ERROR: $hdr->{Status} $hdr->{Reason}" if $hdr->{Status} !~ /^2/ && $hdr->{Status} ne '404';
+
+ $body = decode_utf8($body);
+ my $found = $hdr->{Status} ne '404' && $body =~ /"id":"\Q$id\E",/;
+
+ my $price =
+ $body =~ m{<div class="work_buy_content"><span class="price">([0-9,]+)<i>円</i></span></div>} ? sprintf('JP¥ %d', $1 =~ s/,//gr) :
+ $body =~ m{<i class="work_jpy">([0-9,]+) JPY</i></span>} ? sprintf('JP¥ %d', $1 =~ s/,//gr) : '';
+
+ $shop = $body =~ /,"category":"([^"]+)"/ ? $1 : '';
+ $shop = 'ecchi-eng' if $shop eq 'ecchieng'; # Both work, but DLsite seems to prefer a dash.
+
+ return AE::log warn => "$prefix Product found, but no price ($price) or shop ($shop)" if $found && (!$price || !$shop);
+
+ # We have a price? Update database.
+ if($price && $shop) {
+ pg_cmd q{UPDATE shop_dlsite SET found = TRUE, shop = $2, price = $3, lastfetch = NOW() WHERE id = $1}, [ $id, $shop, $price ];
+ AE::log debug => "$prefix for $price at /$shop/";
+
+ # Nothing? Update DB
+ } else {
+ pg_cmd q{UPDATE shop_dlsite SET found = FALSE, lastfetch = NOW() WHERE id = $1}, [ $id ];
+ AE::log info => "$prefix not found.";
+ }
+}
+
+
+sub fetch {
+ my($shop, $id) = @_;
+ my $ts = AE::now;
+ my $url = sprintf $C{url}, $shop, $id;
+ http_get $url, headers => {'User-Agent' => $C{ua} }, timeout => 60,
+ sub { data($shop, AE::now-$ts, $id, @_) };
+}
+
+
+sub sync {
+ pg_cmd 'SELECT id, shop FROM shop_dlsite ORDER BY lastfetch ASC NULLS FIRST LIMIT 1', [], sub {
+ my($res, $time) = @_;
+ return if pg_expect $res, 1 or !$res->nRows;
+ fetch 'home', $res->value(0,0);
+ };
+}