calibre-web: dtrpg-metadata Initial commit

This adds a new metadata provider to calire-web for retrieving
publication data from drivethrurpg.com
This commit is contained in:
Ross Hendry 2022-09-14 15:46:57 +01:00
parent 413aa5129f
commit 4337200128
7 changed files with 168 additions and 79 deletions

View File

@ -4,8 +4,8 @@ on: [push, pull_request, workflow_dispatch]
env:
ENDPOINT: "linuxserver/mods" #don't modify
BASEIMAGE: "replace_baseimage" #replace
MODNAME: "replace_modname" #replace
BASEIMAGE: "calibre-web" #replace
MODNAME: "dtrpg-metadata" #replace
jobs:
build:

View File

@ -1,6 +1,5 @@
FROM scratch
LABEL maintainer="username"
LABEL maintainer="chooban"
# copy local files
COPY root/ /
COPY root/drivethrurpg.py /app/calibre-web/cps/metadata_provider/drivethrurpg.py

View File

@ -1,23 +0,0 @@
## Buildstage ##
FROM ghcr.io/linuxserver/baseimage-alpine:3.12 as buildstage
RUN \
echo "**** install packages ****" && \
apk add --no-cache \
curl && \
echo "**** grab rclone ****" && \
mkdir -p /root-layer && \
curl -o \
/root-layer/rclone.deb -L \
"https://downloads.rclone.org/v1.47.0/rclone-v1.47.0-linux-amd64.deb"
# copy local files
COPY root/ /root-layer/
## Single layer deployed image ##
FROM scratch
LABEL maintainer="username"
# Add files from buildstage
COPY --from=buildstage /root-layer/ /

View File

@ -1,25 +1,10 @@
# Rsync - Docker mod for openssh-server
# Calibre-web - DriveThruRPG Metadata Provider
This mod adds rsync to openssh-server, to be installed/updated during container start.
This adds a new metadata provider for `calibre-web` which will look for information on DriveThruRPG.com
In openssh-server docker arguments, set an environment variable `DOCKER_MODS=linuxserver/mods:openssh-server-rsync`
After installing, DriveThruRPG will be an option when pulling in metadata about a book.
If adding multiple mods, enter them in an array separated by `|`, such as `DOCKER_MODS=linuxserver/mods:openssh-server-rsync|linuxserver/mods:openssh-server-mod2`
In `calibre-web` docker arguments, set an environment variable `DOCKER_MODS=linuxserver/mods:calibre-web-dtrpg-metadata` to enable.
# Mod creation instructions
* Fork the repo, create a new branch based on the branch `template`.
* Edit the `Dockerfile` for the mod. `Dockerfile.complex` is only an example and included for reference; it should be deleted when done.
* Inspect the `root` folder contents. Edit, add and remove as necessary.
* Edit this readme with pertinent info, delete these instructions.
* Finally edit the `.github/workflows/BuildImage.yml`. Customize the build branch, and the vars for `BASEIMAGE` and `MODNAME`.
* Ask the team to create a new branch named `<baseimagename>-<modname>`. Baseimage should be the name of the image the mod will be applied to. The new branch will be based on the `template` branch.
* Submit PR against the branch created by the team.
## Tips and tricks
* To decrease startup times when multiple mods are used, we have consolidated `apt-get update` down to one file. As seen in the [nodejs mod](https://github.com/linuxserver/docker-mods/tree/code-server-nodejs/root/etc/cont-init.d)
* Some images has helpers built in, these images are currently:
* [Openvscode-server](https://github.com/linuxserver/docker-openvscode-server/pull/10/files)
* [Code-server](https://github.com/linuxserver/docker-code-server/pull/95)
If adding multiple mods, enter them in an array separated by `|`,
such as `DOCKER_MODS=linuxserver/mods:universal-calibre|linuxserver/mods:calibre-web-dtrpg-metadata`

158
root/drivethrurpg.py Normal file
View File

@ -0,0 +1,158 @@
# -*- coding: utf-8 -*-
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.
from typing import Dict, List, Optional
from urllib.parse import quote
from lxml import html
import requests
import re
from cps import logger
from cps.services.Metadata import MetaRecord, MetaSourceInfo, Metadata
log = logger.create()
class DriveThruRpg(Metadata):
__name__ = "DriveThruRPG"
__id__ = "drivethrurpg"
DESCRIPTION = "DriveThru RPG"
META_URL = "https://www.drivethrurpg.com/"
BASE_URL = "https://www.drivethrurpg.com/includes/ajax/search_autocomplete_jquery.php?term="
QUERY_PARAMS = "&json=true"
HEADERS = {"User-Agent": "Not Evil Browser", "accept-encoding": "gzip"}
AUTHORS_XPATH = "//div[@class='widget-information-wrapper']//div[@class='widget-information-item-title' and contains(text(), 'Author(s)')]"
RULE_SYSTEMS_XPATH = "//div[@class='widget-information-wrapper']//div[@class='widget-information-item-title' and contains(text(), 'Rule System(s)')]"
PUBLISHER_XPATH = "//div[@class='widget-information-wrapper-2']//div[@class='widget-information-title' and contains(text(), 'Publisher')]"
URL_PROP_XPATH = "//meta[@itemprop='url']/@content"
DESCRIPTION_XPATH = "//div[contains(@class,'prod-content')]//text()"
IMAGE_PROP_XPATH = "//meta[@itemprop='image']/@content"
def search(
self, query: str, generic_cover: str = "", locale: str = "en"
) -> Optional[List[MetaRecord]]:
val = list()
if self.active:
title_tokens = list(self.get_title_tokens(query, strip_joiners=False))
if title_tokens:
tokens = [quote(t.encode("utf-8")) for t in title_tokens]
query = "%20".join(tokens)
try:
result = requests.get(
f"{DriveThruRpg.BASE_URL}{query}{DriveThruRpg.QUERY_PARAMS}",
headers=DriveThruRpg.HEADERS,
)
result.raise_for_status()
except Exception as e:
log.warning(e)
return None
# Since we'll do on to do N further requests for more information,
# we'll cut it off at the first five results here. Any sufficiently well
# populated search by title should be enough
for r in result.json()[0:5]:
assert isinstance(r, dict)
match = self._parse_search_result(
result=r, generic_cover=generic_cover, locale=locale
)
val.append(match)
return val
def _parse_search_result(
self, result: Dict, generic_cover: str, locale: str
) -> MetaRecord:
match = MetaRecord(
id=result["name"],
title=result["name"],
authors=[],
url=result.get("link", ""),
source=MetaSourceInfo(
id=self.__id__,
description=DriveThruRpg.DESCRIPTION,
link=DriveThruRpg.META_URL,
),
)
try:
details_result = requests.get(
result["link"],
headers=DriveThruRpg.HEADERS,
)
details_result.raise_for_status()
except Exception as e:
log.warning(e)
return match
data = html.fromstring(details_result.content)
# Use the big text field as description as the meta tag is very short
description_field = data.xpath(self.DESCRIPTION_XPATH)
if description_field is not None:
match.description = "".join(description_field).strip()
product_url = data.xpath(self.URL_PROP_XPATH)
if product_url is not None and len(product_url) > 0:
match.url = product_url[0]
# We can get a better ID from the URL
regex = r".*\/product\/(\d+)\/.*"
matches = re.findall(regex, match.url)
if len(matches) > 0:
match.id = matches[0]
image_url = data.xpath(self.IMAGE_PROP_XPATH)
if image_url is not None and len(image_url) > 0:
match.cover = image_url[0]
# Find authors
for div in data.xpath(self.AUTHORS_XPATH):
# Just bring in elements that look like they might be authors.
authors = list(
filter(
lambda x: re.match(r"^\w[\w\s]+$", x),
div.getnext().xpath(".//text()"),
)
)
match.authors = authors
# Use rule systems as tags
match.tags = ["RPG"]
for div in data.xpath(self.RULE_SYSTEMS_XPATH):
rule_systems = list(
filter(
# lambda x: re.match(r"^\w[()\w\s]+$", x),
lambda x: len(x.strip()) > 0,
div.getnext().xpath(".//text()"),
)
)
match.tags.extend(rule_systems)
for div in data.xpath(self.PUBLISHER_XPATH):
publisher_link = div.getnext().xpath(".//a")
# Sometimes we get a link, other times it's text in a different element.
if publisher_link is not None and len(publisher_link) > 0:
match.publisher = publisher_link[0].text_content().strip()
else:
publisher_name = div.getnext().xpath(
".//div[@class='widget-information-item-title']"
)
match.publisher = publisher_name[0].text_content().strip()
# match.publishedDate = result.get("store_date", result.get("date_added"))
match.identifiers = {"drivethrurpg": match.id}
return match

View File

@ -1,27 +0,0 @@
#!/usr/bin/with-contenv bash
# Determine if setup is needed
if [ ! -f /usr/local/lib/python***/dist-packages/sshuttle ] && \
[ -f /usr/bin/apt ]; then
## Ubuntu
apt-get update
apt-get install --no-install-recommends -y \
iptables \
openssh-client \
python3 \
python3-pip
pip3 install sshuttle
fi
if [ ! -f /usr/lib/python***/site-packages/sshuttle ] && \
[ -f /sbin/apk ]; then
# Alpine
apk add --no-cache \
iptables \
openssh \
py3-pip \
python3
pip3 install sshuttle
fi
chown -R root:root /root
chmod -R 600 /root/.ssh

View File

@ -1,3 +0,0 @@
#!/usr/bin/with-contenv bash
sshuttle --dns --remote root@${HOST}:${PORT} 0/0 -x 172.17.0.0/16