mirror of
https://github.com/sigmasternchen/grimoire-ssg
synced 2025-03-15 01:58:54 +00:00
feat: Add module for sitemaps
This commit is contained in:
parent
101d1f6a21
commit
fe7039750d
8 changed files with 107 additions and 7 deletions
|
@ -5,4 +5,5 @@ enabled_modules:
|
||||||
- tags
|
- tags
|
||||||
- markdown
|
- markdown
|
||||||
- templating
|
- templating
|
||||||
- test
|
- test
|
||||||
|
- sitemaps
|
|
@ -2,7 +2,7 @@ from grimoiressg.modules import available_modules
|
||||||
from grimoiressg.utils import logger
|
from grimoiressg.utils import logger
|
||||||
|
|
||||||
|
|
||||||
def test(data, context):
|
def test(data, context, config):
|
||||||
logger.info("This is test module.")
|
logger.info("This is test module.")
|
||||||
|
|
||||||
|
|
||||||
|
|
|
@ -10,7 +10,7 @@ from grimoiressg.utils import logger
|
||||||
def apply_modules(data, config, context):
|
def apply_modules(data, config, context):
|
||||||
for module in config.get("enabled_modules", []):
|
for module in config.get("enabled_modules", []):
|
||||||
logger.info("Applying module %s...", module)
|
logger.info("Applying module %s...", module)
|
||||||
available_modules[module](data, context)
|
available_modules[module](data, context, config)
|
||||||
|
|
||||||
|
|
||||||
def main():
|
def main():
|
||||||
|
|
|
@ -1,11 +1,13 @@
|
||||||
from grimoiressg.modules.markdown import compile_markdown
|
from grimoiressg.modules.markdown import compile_markdown
|
||||||
|
from grimoiressg.modules.sitemaps import generate_sitemaps
|
||||||
from grimoiressg.modules.tags import extract_tags
|
from grimoiressg.modules.tags import extract_tags
|
||||||
from grimoiressg.modules.templating import render_templates
|
from grimoiressg.modules.templating import render_templates
|
||||||
|
|
||||||
available_modules = {
|
available_modules = {
|
||||||
"tags": extract_tags,
|
"tags": extract_tags,
|
||||||
"markdown": compile_markdown,
|
"markdown": compile_markdown,
|
||||||
"templating": render_templates
|
"templating": render_templates,
|
||||||
|
"sitemaps": generate_sitemaps,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
|
@ -3,7 +3,7 @@ import markdown
|
||||||
from grimoiressg.utils import logger
|
from grimoiressg.utils import logger
|
||||||
|
|
||||||
|
|
||||||
def compile_markdown(data, context):
|
def compile_markdown(data, context, config):
|
||||||
for entry in data:
|
for entry in data:
|
||||||
if "markdown" in entry:
|
if "markdown" in entry:
|
||||||
logger.debug("Compiling markdown for %s...", entry['relative_filename'])
|
logger.debug("Compiling markdown for %s...", entry['relative_filename'])
|
||||||
|
|
97
grimoiressg/modules/sitemaps.py
Normal file
97
grimoiressg/modules/sitemaps.py
Normal file
|
@ -0,0 +1,97 @@
|
||||||
|
import gzip
|
||||||
|
import os
|
||||||
|
from itertools import batched
|
||||||
|
from xml.etree import ElementTree as ET
|
||||||
|
|
||||||
|
from grimoiressg.utils import to_relative, logger
|
||||||
|
|
||||||
|
INDEX_FILE_STRATEGY_NONE = "none"
|
||||||
|
INDEX_FILE_STRATEGY_AUTO = "auto"
|
||||||
|
|
||||||
|
|
||||||
|
def sitemaps_default_config():
|
||||||
|
return {
|
||||||
|
"file_prefix": "sitemap",
|
||||||
|
"loc_prefix": "https://example.com/",
|
||||||
|
"index_file_strategy": INDEX_FILE_STRATEGY_AUTO,
|
||||||
|
"compression": False,
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
def get_files_to_map(data, sitemap_config):
|
||||||
|
content_for_sitemap = filter(
|
||||||
|
lambda item: item.get("output", False) and not item.get("skip_sitemap", False),
|
||||||
|
data
|
||||||
|
)
|
||||||
|
|
||||||
|
if sitemap_config["index_file_strategy"] == INDEX_FILE_STRATEGY_AUTO:
|
||||||
|
# maximum number of entries is 50 000, however there is also a 50 MiB size limit
|
||||||
|
# -> make 20 000 item batches - to be safe
|
||||||
|
return list(batched(content_for_sitemap, 20000))
|
||||||
|
else:
|
||||||
|
return [content_for_sitemap]
|
||||||
|
|
||||||
|
|
||||||
|
def get_sitemap_file_suffix(sitemap_config):
|
||||||
|
if sitemap_config["compression"]:
|
||||||
|
return ".yml.gz"
|
||||||
|
else:
|
||||||
|
return ".xml"
|
||||||
|
|
||||||
|
|
||||||
|
def save_sitemaps_file(xml_data, name, context, sitemap_config):
|
||||||
|
xml_str = ET.tostring(xml_data, encoding='utf8')
|
||||||
|
|
||||||
|
filename = os.path.realpath(context["output_dir"] + "/" + name + get_sitemap_file_suffix(sitemap_config))
|
||||||
|
logger.debug("Writing sitemap %s", to_relative(filename))
|
||||||
|
|
||||||
|
open_function = gzip.open if sitemap_config["compression"] else open
|
||||||
|
with open_function(filename, "wb") as file:
|
||||||
|
file.write(xml_str)
|
||||||
|
|
||||||
|
|
||||||
|
def generate_index_file(context, sitemap_config, number_of_batches):
|
||||||
|
root = ET.Element("sitemapindex", attrib={
|
||||||
|
"xmlns:xsi": "http://www.w3.org/2001/XMLSchema-instance",
|
||||||
|
"xsi:schemaLocation": "http://www.sitemaps.org/schemas/sitemap/0.9 "
|
||||||
|
"http://www.sitemaps.org/schemas/sitemap/0.9/siteindex.xsd",
|
||||||
|
"xmlns": "http://www.sitemaps.org/schemas/sitemap/0.9"
|
||||||
|
})
|
||||||
|
|
||||||
|
for i in range(1, number_of_batches + 1):
|
||||||
|
sitemap = ET.SubElement(root, "sitemap")
|
||||||
|
loc = ET.SubElement(sitemap, "loc")
|
||||||
|
loc.text = sitemap_config["loc_prefix"] + sitemap_config["file_prefix"] + str(i) + get_sitemap_file_suffix(
|
||||||
|
sitemap_config)
|
||||||
|
|
||||||
|
save_sitemaps_file(root, sitemap_config["file_prefix"], context, sitemap_config)
|
||||||
|
|
||||||
|
|
||||||
|
def generate_sitemaps_file(batch, name, context, sitemap_config):
|
||||||
|
root = ET.Element("urlset", attrib={
|
||||||
|
"xmlns:xsi": "http://www.w3.org/2001/XMLSchema-instance",
|
||||||
|
"xsi:schemaLocation": "http://www.sitemaps.org/schemas/sitemap/0.9 "
|
||||||
|
"http://www.sitemaps.org/schemas/sitemap/0.9/sitemap.xsd",
|
||||||
|
"xmlns": "http://www.sitemaps.org/schemas/sitemap/0.9"
|
||||||
|
})
|
||||||
|
|
||||||
|
for entry in batch:
|
||||||
|
url = ET.SubElement(root, "url")
|
||||||
|
loc = ET.SubElement(url, "loc")
|
||||||
|
loc.text = sitemap_config["loc_prefix"] + entry["output"]
|
||||||
|
|
||||||
|
save_sitemaps_file(root, name, context, sitemap_config)
|
||||||
|
|
||||||
|
|
||||||
|
def generate_sitemaps(data, context, config):
|
||||||
|
sitemaps_config = sitemaps_default_config()
|
||||||
|
sitemaps_config.update(config.get('sitemaps', {}))
|
||||||
|
|
||||||
|
batches = get_files_to_map(data, sitemaps_config)
|
||||||
|
if len(batches) > 1:
|
||||||
|
logger.info("Entry limit exceeded; generating index file...")
|
||||||
|
generate_index_file(context, sitemaps_config, len(batches))
|
||||||
|
for i, batch in enumerate(batches):
|
||||||
|
generate_sitemaps_file(batch, sitemaps_config["file_prefix"] + str(i + 1), context, sitemaps_config)
|
||||||
|
else:
|
||||||
|
generate_sitemaps_file(batches[0], sitemaps_config["file_prefix"], context, sitemaps_config)
|
|
@ -1,7 +1,7 @@
|
||||||
from grimoiressg.utils import logger
|
from grimoiressg.utils import logger
|
||||||
|
|
||||||
|
|
||||||
def extract_tags(data, context):
|
def extract_tags(data, context, config):
|
||||||
tags = {}
|
tags = {}
|
||||||
|
|
||||||
for entry in data:
|
for entry in data:
|
||||||
|
|
|
@ -9,7 +9,7 @@ jinja_env = Environment(
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
def render_templates(data, context):
|
def render_templates(data, context, config):
|
||||||
files_written = 0
|
files_written = 0
|
||||||
|
|
||||||
for entry in data:
|
for entry in data:
|
||||||
|
|
Loading…
Reference in a new issue