Using w3m for HTML->TXT doc conversion

h/t Shin-ichi for the tip
This commit is contained in:
Bill Kendrick 2023-07-16 10:48:00 -07:00
parent 8800a58533
commit 82ee0dda75
4 changed files with 32 additions and 45 deletions

View file

@ -6,7 +6,7 @@ Copyright (c) 2002-2023
Various contributors (see below, and AUTHORS.txt) Various contributors (see below, and AUTHORS.txt)
https://tuxpaint.org/ https://tuxpaint.org/
2023.July.13 (0.9.31) 2023.July.15 (0.9.31)
* New Magic Tools: * New Magic Tools:
---------------- ----------------
* Loops - Draw loop-the-loops. * Loops - Draw loop-the-loops.
@ -105,6 +105,12 @@ https://tuxpaint.org/
and keyboard accessibility mode). and keyboard accessibility mode).
Bill Kendrick <bill@newbreedsoftware.com> Bill Kendrick <bill@newbreedsoftware.com>
* Using `w3m` (rather than `links`) to generated plain text
alternatives to HTML documentation. (It handles word-wrapping
Japanese text properly.)
h/t TOYAMA Shin-ichi
Bill Kendrick <bill@newbreedsoftware.com>
* Bug Fixes: * Bug Fixes:
---------- ----------
* In some window size / button size combinations, Eraser * In some window size / button size combinations, Eraser

View file

@ -7,15 +7,17 @@
# Bill Kendrick # Bill Kendrick
# bill@newbreedsoftware.com # bill@newbreedsoftware.com
# #
# Sept. 4, 2005 - June 29, 2023 # Sept. 4, 2005 - July 16, 2023
# FIXME: Japanese does not wordwrap in many cases, leading to very long # FIXME: Japanese does not wordwrap in many cases, leading to very long
# lines in the TXT output. Post-processing with `fmt` doesn't look like # lines in the TXT output. Post-processing with `fmt` doesn't look like
# it would help, because it doesn't know how to wrap Japanese, either. # it would help, because it doesn't know how to wrap Japanese, either.
# -bjk 2023.05.02 # -bjk 2023.05.02
# HTML2TXT_OPTIONS:=-dump -codepage utf8 -width 80
# HTML2TXT:=links $(HTML2TXT_OPTIONS)
LINKS_OPTIONS:=-dump -codepage utf8 -width 80 HTML2TXT_OPTIONS:=-dump -cols 80 -no-graph -o pseudo_inlines=f -o display_image=f -T text/html
LINKS:=links $(LINKS_OPTIONS) HTML2TXT:=./w3m.sh $(HTML2TXT_OPTIONS)
EN_HTMLFILES:=$(wildcard en/html/*.html) EN_HTMLFILES:=$(wildcard en/html/*.html)
EN_TEXTFILES:=$(patsubst en/html/%.html,en/%.txt,$(EN_HTMLFILES)) EN_TEXTFILES:=$(patsubst en/html/%.html,en/%.txt,$(EN_HTMLFILES))
@ -80,35 +82,34 @@ clean:
$(ZH_TW_TEXTFILES) $(ZH_TW_TEXTFILES)
$(EN_TEXTFILES): en/%.txt: en/html/%.html $(EN_TEXTFILES): en/%.txt: en/html/%.html
$(LINKS) $< > $@ $(HTML2TXT) $< > $@
$(ES_TEXTFILES): es_ES.UTF-8/%.txt: es_ES.UTF-8/html/%.html $(ES_TEXTFILES): es_ES.UTF-8/%.txt: es_ES.UTF-8/html/%.html
$(LINKS) $< > $@ $(HTML2TXT) $< > $@
$(FR_TEXTFILES): fr_FR.UTF-8/%.txt: fr_FR.UTF-8/html/%.html $(FR_TEXTFILES): fr_FR.UTF-8/%.txt: fr_FR.UTF-8/html/%.html
$(LINKS) $< > $@ $(HTML2TXT) $< > $@
$(GL_TEXTFILES): gl_ES.UTF-8/%.txt: gl_ES.UTF-8/html/%.html $(GL_TEXTFILES): gl_ES.UTF-8/%.txt: gl_ES.UTF-8/html/%.html
$(LINKS) $< > $@ $(HTML2TXT) $< > $@
$(IS_TEXTFILES): is_IS.UTF-8/%.txt: is_IS.UTF-8/html/%.html $(IS_TEXTFILES): is_IS.UTF-8/%.txt: is_IS.UTF-8/html/%.html
$(LINKS) $< > $@ $(HTML2TXT) $< > $@
$(IT_TEXTFILES): it/%.txt: it/html/%.html $(IT_TEXTFILES): it/%.txt: it/html/%.html
$(LINKS) $< > $@ $(HTML2TXT) $< > $@
$(JA_TEXTFILES): ja_JP.UTF-8/%.txt: ja_JP.UTF-8/html/%.html $(JA_TEXTFILES): ja_JP.UTF-8/%.txt: ja_JP.UTF-8/html/%.html
$(LINKS) $< > $@ $(HTML2TXT) $< > $@
$(NL_TEXTFILES): nl/%.txt: nl/html/%.html $(NL_TEXTFILES): nl/%.txt: nl/html/%.html
$(LINKS) $< > $@ $(HTML2TXT) $< > $@
$(RU_TEXTFILES): ru/%.txt: ru/html/%.html $(RU_TEXTFILES): ru/%.txt: ru/html/%.html
$(LINKS) $< > $@ $(HTML2TXT) $< > $@
$(ZH_CN_TEXTFILES): zh_cn/%.txt: zh_cn/html/%.html $(ZH_CN_TEXTFILES): zh_cn/%.txt: zh_cn/html/%.html
$(LINKS) $< > $@ $(HTML2TXT) $< > $@
$(ZH_TW_TEXTFILES): zh_tw/%.txt: zh_tw/html/%.html $(ZH_TW_TEXTFILES): zh_tw/%.txt: zh_tw/html/%.html
$(LINKS) $< > $@ $(HTML2TXT) $< > $@

View file

@ -1,27 +0,0 @@
#!/usr/bin/env python3
# FIXME: Under construction!
# Experimental script to try and wrap
# Japanese plaintext docs, since `links` does not
# currently do this properly. -bjk 2023.07.15
import cjkwrap
with open('ja_JP.UTF-8/README.txt') as f:
while True:
line = f.readline()
if not line:
break
oldlen = len(line)
line = line.lstrip()
newlen = len(line)
indent = oldlen - newlen
wrappedlines = cjkwrap.wrap(line, 78 - indent)
for l in wrappedlines:
print(" " * indent, end="")
print(l)
f.close()

7
docs/w3m.sh Executable file
View file

@ -0,0 +1,7 @@
#!/bin/bash
infile=${@: -1}
args=${@:1:${#}-1}
sed -e "s/<\\/dd>/<br\\/><br\\/><\\/dd>/g" $infile | w3m $args