From 4337200128bc23b2eeaaf8a6d8faaa341f4cb6a6 Mon Sep 17 00:00:00 2001 From: Ross Hendry Date: Wed, 14 Sep 2022 15:46:57 +0100 Subject: [PATCH] calibre-web: dtrpg-metadata Initial commit This adds a new metadata provider to calire-web for retrieving publication data from drivethrurpg.com --- .github/workflows/BuildImage.yml | 4 +- Dockerfile | 5 +- Dockerfile.complex | 23 ----- README.md | 27 ++--- root/drivethrurpg.py | 158 +++++++++++++++++++++++++++++ root/etc/cont-init.d/98-vpn-config | 27 ----- root/etc/services.d/sshvpn/run | 3 - 7 files changed, 168 insertions(+), 79 deletions(-) delete mode 100644 Dockerfile.complex create mode 100644 root/drivethrurpg.py delete mode 100644 root/etc/cont-init.d/98-vpn-config delete mode 100644 root/etc/services.d/sshvpn/run diff --git a/.github/workflows/BuildImage.yml b/.github/workflows/BuildImage.yml index 518b0d8..53c1eff 100644 --- a/.github/workflows/BuildImage.yml +++ b/.github/workflows/BuildImage.yml @@ -4,8 +4,8 @@ on: [push, pull_request, workflow_dispatch] env: ENDPOINT: "linuxserver/mods" #don't modify - BASEIMAGE: "replace_baseimage" #replace - MODNAME: "replace_modname" #replace + BASEIMAGE: "calibre-web" #replace + MODNAME: "dtrpg-metadata" #replace jobs: build: diff --git a/Dockerfile b/Dockerfile index 4ece5e8..6da9698 100644 --- a/Dockerfile +++ b/Dockerfile @@ -1,6 +1,5 @@ FROM scratch -LABEL maintainer="username" +LABEL maintainer="chooban" -# copy local files -COPY root/ / +COPY root/drivethrurpg.py /app/calibre-web/cps/metadata_provider/drivethrurpg.py diff --git a/Dockerfile.complex b/Dockerfile.complex deleted file mode 100644 index db4598e..0000000 --- a/Dockerfile.complex +++ /dev/null @@ -1,23 +0,0 @@ -## Buildstage ## -FROM ghcr.io/linuxserver/baseimage-alpine:3.12 as buildstage - -RUN \ - echo "**** install packages ****" && \ - apk add --no-cache \ - curl && \ - echo "**** grab rclone ****" && \ - mkdir -p /root-layer && \ - curl -o \ - /root-layer/rclone.deb -L \ - "https://downloads.rclone.org/v1.47.0/rclone-v1.47.0-linux-amd64.deb" - -# copy local files -COPY root/ /root-layer/ - -## Single layer deployed image ## -FROM scratch - -LABEL maintainer="username" - -# Add files from buildstage -COPY --from=buildstage /root-layer/ / diff --git a/README.md b/README.md index 761c799..a74d349 100644 --- a/README.md +++ b/README.md @@ -1,25 +1,10 @@ -# Rsync - Docker mod for openssh-server +# Calibre-web - DriveThruRPG Metadata Provider -This mod adds rsync to openssh-server, to be installed/updated during container start. +This adds a new metadata provider for `calibre-web` which will look for information on DriveThruRPG.com -In openssh-server docker arguments, set an environment variable `DOCKER_MODS=linuxserver/mods:openssh-server-rsync` +After installing, DriveThruRPG will be an option when pulling in metadata about a book. -If adding multiple mods, enter them in an array separated by `|`, such as `DOCKER_MODS=linuxserver/mods:openssh-server-rsync|linuxserver/mods:openssh-server-mod2` +In `calibre-web` docker arguments, set an environment variable `DOCKER_MODS=linuxserver/mods:calibre-web-dtrpg-metadata` to enable. -# Mod creation instructions - -* Fork the repo, create a new branch based on the branch `template`. -* Edit the `Dockerfile` for the mod. `Dockerfile.complex` is only an example and included for reference; it should be deleted when done. -* Inspect the `root` folder contents. Edit, add and remove as necessary. -* Edit this readme with pertinent info, delete these instructions. -* Finally edit the `.github/workflows/BuildImage.yml`. Customize the build branch, and the vars for `BASEIMAGE` and `MODNAME`. -* Ask the team to create a new branch named `-`. Baseimage should be the name of the image the mod will be applied to. The new branch will be based on the `template` branch. -* Submit PR against the branch created by the team. - - -## Tips and tricks - -* To decrease startup times when multiple mods are used, we have consolidated `apt-get update` down to one file. As seen in the [nodejs mod](https://github.com/linuxserver/docker-mods/tree/code-server-nodejs/root/etc/cont-init.d) -* Some images has helpers built in, these images are currently: - * [Openvscode-server](https://github.com/linuxserver/docker-openvscode-server/pull/10/files) - * [Code-server](https://github.com/linuxserver/docker-code-server/pull/95) +If adding multiple mods, enter them in an array separated by `|`, +such as `DOCKER_MODS=linuxserver/mods:universal-calibre|linuxserver/mods:calibre-web-dtrpg-metadata` diff --git a/root/drivethrurpg.py b/root/drivethrurpg.py new file mode 100644 index 0000000..4bb7cea --- /dev/null +++ b/root/drivethrurpg.py @@ -0,0 +1,158 @@ +# -*- coding: utf-8 -*- + +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see . + +from typing import Dict, List, Optional +from urllib.parse import quote +from lxml import html +import requests +import re + +from cps import logger +from cps.services.Metadata import MetaRecord, MetaSourceInfo, Metadata + +log = logger.create() + + +class DriveThruRpg(Metadata): + __name__ = "DriveThruRPG" + __id__ = "drivethrurpg" + DESCRIPTION = "DriveThru RPG" + META_URL = "https://www.drivethrurpg.com/" + BASE_URL = "https://www.drivethrurpg.com/includes/ajax/search_autocomplete_jquery.php?term=" + QUERY_PARAMS = "&json=true" + HEADERS = {"User-Agent": "Not Evil Browser", "accept-encoding": "gzip"} + + AUTHORS_XPATH = "//div[@class='widget-information-wrapper']//div[@class='widget-information-item-title' and contains(text(), 'Author(s)')]" + RULE_SYSTEMS_XPATH = "//div[@class='widget-information-wrapper']//div[@class='widget-information-item-title' and contains(text(), 'Rule System(s)')]" + PUBLISHER_XPATH = "//div[@class='widget-information-wrapper-2']//div[@class='widget-information-title' and contains(text(), 'Publisher')]" + URL_PROP_XPATH = "//meta[@itemprop='url']/@content" + DESCRIPTION_XPATH = "//div[contains(@class,'prod-content')]//text()" + IMAGE_PROP_XPATH = "//meta[@itemprop='image']/@content" + + def search( + self, query: str, generic_cover: str = "", locale: str = "en" + ) -> Optional[List[MetaRecord]]: + val = list() + if self.active: + title_tokens = list(self.get_title_tokens(query, strip_joiners=False)) + if title_tokens: + tokens = [quote(t.encode("utf-8")) for t in title_tokens] + query = "%20".join(tokens) + + try: + result = requests.get( + f"{DriveThruRpg.BASE_URL}{query}{DriveThruRpg.QUERY_PARAMS}", + headers=DriveThruRpg.HEADERS, + ) + result.raise_for_status() + except Exception as e: + log.warning(e) + return None + + # Since we'll do on to do N further requests for more information, + # we'll cut it off at the first five results here. Any sufficiently well + # populated search by title should be enough + for r in result.json()[0:5]: + assert isinstance(r, dict) + match = self._parse_search_result( + result=r, generic_cover=generic_cover, locale=locale + ) + val.append(match) + return val + + def _parse_search_result( + self, result: Dict, generic_cover: str, locale: str + ) -> MetaRecord: + match = MetaRecord( + id=result["name"], + title=result["name"], + authors=[], + url=result.get("link", ""), + source=MetaSourceInfo( + id=self.__id__, + description=DriveThruRpg.DESCRIPTION, + link=DriveThruRpg.META_URL, + ), + ) + + try: + details_result = requests.get( + result["link"], + headers=DriveThruRpg.HEADERS, + ) + details_result.raise_for_status() + except Exception as e: + log.warning(e) + return match + + data = html.fromstring(details_result.content) + + # Use the big text field as description as the meta tag is very short + description_field = data.xpath(self.DESCRIPTION_XPATH) + if description_field is not None: + match.description = "".join(description_field).strip() + + product_url = data.xpath(self.URL_PROP_XPATH) + if product_url is not None and len(product_url) > 0: + match.url = product_url[0] + + # We can get a better ID from the URL + regex = r".*\/product\/(\d+)\/.*" + matches = re.findall(regex, match.url) + if len(matches) > 0: + match.id = matches[0] + + image_url = data.xpath(self.IMAGE_PROP_XPATH) + if image_url is not None and len(image_url) > 0: + match.cover = image_url[0] + + # Find authors + for div in data.xpath(self.AUTHORS_XPATH): + # Just bring in elements that look like they might be authors. + authors = list( + filter( + lambda x: re.match(r"^\w[\w\s]+$", x), + div.getnext().xpath(".//text()"), + ) + ) + match.authors = authors + + # Use rule systems as tags + match.tags = ["RPG"] + for div in data.xpath(self.RULE_SYSTEMS_XPATH): + rule_systems = list( + filter( + # lambda x: re.match(r"^\w[()\w\s]+$", x), + lambda x: len(x.strip()) > 0, + div.getnext().xpath(".//text()"), + ) + ) + match.tags.extend(rule_systems) + + for div in data.xpath(self.PUBLISHER_XPATH): + publisher_link = div.getnext().xpath(".//a") + # Sometimes we get a link, other times it's text in a different element. + if publisher_link is not None and len(publisher_link) > 0: + match.publisher = publisher_link[0].text_content().strip() + else: + publisher_name = div.getnext().xpath( + ".//div[@class='widget-information-item-title']" + ) + match.publisher = publisher_name[0].text_content().strip() + + # match.publishedDate = result.get("store_date", result.get("date_added")) + match.identifiers = {"drivethrurpg": match.id} + + return match diff --git a/root/etc/cont-init.d/98-vpn-config b/root/etc/cont-init.d/98-vpn-config deleted file mode 100644 index a5f9127..0000000 --- a/root/etc/cont-init.d/98-vpn-config +++ /dev/null @@ -1,27 +0,0 @@ -#!/usr/bin/with-contenv bash - -# Determine if setup is needed -if [ ! -f /usr/local/lib/python***/dist-packages/sshuttle ] && \ -[ -f /usr/bin/apt ]; then - ## Ubuntu - apt-get update - apt-get install --no-install-recommends -y \ - iptables \ - openssh-client \ - python3 \ - python3-pip - pip3 install sshuttle -fi -if [ ! -f /usr/lib/python***/site-packages/sshuttle ] && \ -[ -f /sbin/apk ]; then - # Alpine - apk add --no-cache \ - iptables \ - openssh \ - py3-pip \ - python3 - pip3 install sshuttle -fi - -chown -R root:root /root -chmod -R 600 /root/.ssh diff --git a/root/etc/services.d/sshvpn/run b/root/etc/services.d/sshvpn/run deleted file mode 100644 index 7d49e79..0000000 --- a/root/etc/services.d/sshvpn/run +++ /dev/null @@ -1,3 +0,0 @@ -#!/usr/bin/with-contenv bash - -sshuttle --dns --remote root@${HOST}:${PORT} 0/0 -x 172.17.0.0/16