Using w3m for HTML->TXT doc conversion

h/t Shin-ichi for the tip
This commit is contained in:
Bill Kendrick 2023-07-16 10:48:00 -07:00
parent 8800a58533
commit 82ee0dda75
4 changed files with 32 additions and 45 deletions

View file

@ -1,21 +1,23 @@
# Makefile for Tux Paint docs
#
#
# Uses "links" to convert docs from HTML to plain text.
# (Normally only ran by the developers after updating the HTML, prior to
# release.)
#
# Bill Kendrick
# bill@newbreedsoftware.com
#
# Sept. 4, 2005 - June 29, 2023
#
# Sept. 4, 2005 - July 16, 2023
# FIXME: Japanese does not wordwrap in many cases, leading to very long
# lines in the TXT output. Post-processing with `fmt` doesn't look like
# it would help, because it doesn't know how to wrap Japanese, either.
# -bjk 2023.05.02
# HTML2TXT_OPTIONS:=-dump -codepage utf8 -width 80
# HTML2TXT:=links $(HTML2TXT_OPTIONS)
LINKS_OPTIONS:=-dump -codepage utf8 -width 80
LINKS:=links $(LINKS_OPTIONS)
HTML2TXT_OPTIONS:=-dump -cols 80 -no-graph -o pseudo_inlines=f -o display_image=f -T text/html
HTML2TXT:=./w3m.sh $(HTML2TXT_OPTIONS)
EN_HTMLFILES:=$(wildcard en/html/*.html)
EN_TEXTFILES:=$(patsubst en/html/%.html,en/%.txt,$(EN_HTMLFILES))
@ -80,35 +82,34 @@ clean:
$(ZH_TW_TEXTFILES)
$(EN_TEXTFILES): en/%.txt: en/html/%.html
$(LINKS) $< > $@
$(HTML2TXT) $< > $@
$(ES_TEXTFILES): es_ES.UTF-8/%.txt: es_ES.UTF-8/html/%.html
$(LINKS) $< > $@
$(HTML2TXT) $< > $@
$(FR_TEXTFILES): fr_FR.UTF-8/%.txt: fr_FR.UTF-8/html/%.html
$(LINKS) $< > $@
$(HTML2TXT) $< > $@
$(GL_TEXTFILES): gl_ES.UTF-8/%.txt: gl_ES.UTF-8/html/%.html
$(LINKS) $< > $@
$(HTML2TXT) $< > $@
$(IS_TEXTFILES): is_IS.UTF-8/%.txt: is_IS.UTF-8/html/%.html
$(LINKS) $< > $@
$(HTML2TXT) $< > $@
$(IT_TEXTFILES): it/%.txt: it/html/%.html
$(LINKS) $< > $@
$(HTML2TXT) $< > $@
$(JA_TEXTFILES): ja_JP.UTF-8/%.txt: ja_JP.UTF-8/html/%.html
$(LINKS) $< > $@
$(HTML2TXT) $< > $@
$(NL_TEXTFILES): nl/%.txt: nl/html/%.html
$(LINKS) $< > $@
$(HTML2TXT) $< > $@
$(RU_TEXTFILES): ru/%.txt: ru/html/%.html
$(LINKS) $< > $@
$(HTML2TXT) $< > $@
$(ZH_CN_TEXTFILES): zh_cn/%.txt: zh_cn/html/%.html
$(LINKS) $< > $@
$(HTML2TXT) $< > $@
$(ZH_TW_TEXTFILES): zh_tw/%.txt: zh_tw/html/%.html
$(LINKS) $< > $@
$(HTML2TXT) $< > $@