pandoc · Delanii · Oct 13, 2020 · Oct 13, 2020 · Oct 13, 2020 · Nov 6, 2020
@@ -0,0 +1,27 @@
+# Non-breakable space filter
+
+This filter replaces regular spaces with non-breakable spaces according to 
+predefined conditions. Currently, this filter replaces regular spaces with
+unbreakable ones after one-letter words (prefixes and conjunctions):
+'a', 'i', 'k', 'o', 's', 'u', 'v', 'z'; and theyre uppercase variant. Also
+inserts non-breakable spaces in front of en-dashes and in front of numbers.
+Some extra effort is taken in detecting these patterns in *not-fully* parsed
+strings (for example, if this filter is used after some macro replacing 
+filter).
+
+In this regard this filter functions similarly like TeX `vlna` preprocessor
+or LuaTeX `luavlna` package.
+
+The default settings are conformant to Czech typography rules, but these can
+be changed easily by user customization in filter file `nonbreakablespace.lua`
+by changing contents of `prefixes` or `dashes` tables.
+
+Currently supported formats are:
+
+* LaTeX a ConTeXt
+* Open Office Document
+* MS Word
+* HTML
+
+**NOTE**: Using this filter increases strain on line-breaking patterns. Whenever 
+possible, consider allowing hyphenation.
@@ -0,0 +1,12 @@
+<h1 id="tests">Tests</h1>
+<h2 id="basic-test">Basic test</h2>
+<p>a&nbsp;test i&nbsp;test k&nbsp;test o&nbsp;test s&nbsp;test u&nbsp;test v&nbsp;test z&nbsp;test A&nbsp;test I&nbsp;test K&nbsp;test O test S&nbsp;test U&nbsp;test V&nbsp;test Z&nbsp;test&nbsp;– test&nbsp;– test</p>
+<h2 id="test-with-numbers">Test with numbers</h2>
+<p>Test&nbsp;19 test “19” test</p>
+<h2 id="test-of-double-prefixes.">Test of double prefixes.</h2>
+<p>A&nbsp;i&nbsp;test, i&nbsp;v&nbsp;test, a&nbsp;k&nbsp;test, a&nbsp;v&nbsp;test.</p>
+<h2 id="test-of-block-code">Test of block code</h2>
+<pre><code>a = 5
+k = &quot;test&quot;</code></pre>
+<h2 id="test-of-inline-code">Test of inline code</h2>
+<p>Test <code>a = 5</code> test</p>
@@ -0,0 +1,12 @@
+<h1 id="tests">Tests</h1>
+<h2 id="basic-test">Basic test</h2>
+<p>a&nbsp;test i test A&nbsp;test I&nbsp;test the&nbsp;test The&nbsp;test&nbsp;– test&nbsp;– test</p>
+<h2 id="test-with-numbers">Test with numbers</h2>
+<p>Test&nbsp;19 test “19” test</p>
+<h2 id="test-of-double-prefixes.">Test of double prefixes.</h2>
+<p>A&nbsp;i test, i v test, a&nbsp;k test, a&nbsp;v test.</p>
+<h2 id="test-of-block-code">Test of block code</h2>
+<pre><code>a = 5
+k = &quot;test&quot;</code></pre>
+<h2 id="test-of-inline-code">Test of inline code</h2>
+<p>Test <code>a = 5</code> test</p>
@@ -0,0 +1,6 @@
+DIFF ?= diff --strip-trailing-cr -u
+
+test:
+	@pandoc --lua-filter=pandocVlna.lua sampleCZ.md | $(DIFF) expectedCZ.html -
+	@pandoc --lua-filter=pandocVlna.lua sampleEN.md | $(DIFF) expectedEN.html -
+.PHONY: test
@@ -0,0 +1,202 @@
+local utils = require 'pandoc.utils'
+local stringify = utils.stringify
+
+--[[
+Indexed table of one-letter prefixes, after which should be inserted '\160'.
+Verbose, but can be changed per user requirements.
+--]]
+
+local prefixes = {}
+
+local prefixesEN = {
+  'I',
+  'a',
+  'A',
+  'the',
+  'The'
+}
+
+local prefixes = {
+  'a',
+  'i',
+  'k',
+  'o',
+  's',
+  'u',
+  'v',
+  'z',
+  'A',
+  'I',
+  'K',
+  'O',
+  'S',
+  'U',
+  'V',
+  'Z'
+}
+
+-- Set `prefixes` according to `lang` metadata value
+function Meta(meta)
+  if meta.lang then
+    langSet = stringify(meta.lang)
+
+    if langSet == 'cs' then
+      prefixes = prefixesCZ
+    else
+      prefixes = prefixesEN --default to english prefixes
+    end
+
+  else
+    prefixes = prefixesEN --default to english prefixes
+  end
+
+  return prefixes
+end
+
+--[[
+Some languages (czech among them) require nonbreakable space *before* long dash
+--]]
+
+local dashes = {
+  '--',
+  '–'
+}
+
+--[[
+Table of replacement elements
+--]]
+
+local nonbreakablespaces = {
+  html = '&nbsp;',
+  latex = '~',
+  context = '~'
+}
+
+--[[
+Function responsible for searching for one-letter prefixes, after which is
+inserted non-breakable space. Function is short-circuited, that means:
+
+* If it finds match with `prefix` in `prefixes` table, then it returns `true`.
+* Otherwise, after the iteration is finished, returns `false` (prefix wasnt
+found).
+--]]
+
+function find_one_letter_prefix(my_string)
+  for index, prefix in ipairs(prefixes) do
+    if my_string == prefix then
+      return true
+      end
+  end
+  return false
+end
+
+--[[
+Function responsible for searching for dashes, before whose is inserted
+non-breakable space. Function is short-circuited, that means:
+
+* If it finds match with `dash` in `dashes` table, then it returns `true`.
+* Otherwise, after the iteration is finished, returns `false` (dash wasnt
+found).
+--]]
+
+function find_dashes(my_dash)
+  for index, dash in ipairs(dashes) do
+    if my_dash == dash then
+      return true
+      end
+  end
+  return false
+end
+
+--[[
+Function to determine Space element replacement for non-breakable space according to output format
+--]]
+
+function insert_nonbreakable_space(format)
+  if format == 'html' then
+    return pandoc.RawInline('html', nonbreakablespaces.html)
+  elseif format:match 'latex' then
+    return pandoc.RawInline('tex',nonbreakablespaces.latex)
+  elseif format:match 'context' then
+    return pandoc.RawInline('tex',nonbreakablespaces.latex)
+  else
+    --fallback to inserting non-breakable space unicode symbol
+    return pandoc.Str '\u{a0}'
+  end
+end
+
+--[[
+Core filter function:
+
+* It iterates over all inline elements in block
+* If it finds Space element, uses previously defined functions to find
+`prefixes` or `dashes`
+* Replaces Space element with `Str '\u{a0}'`, which is non-breakable space
+representation
+* Returns modified list of inlines
+--]]
+
+function Inlines (inlines)
+
+  --variable holding replacement value for the non-breakable space
+  local insert = insert_nonbreakable_space(FORMAT)
+
+  for i = 1, #inlines do
+    if inlines[i].t == 'Space' then
+
+      -- Check for one-letter prefixes in Str before Space
+
+      if inlines[i - 1].t == 'Str' then
+          local one_letter_prefix = find_one_letter_prefix(inlines[i - 1].text)
+            if one_letter_prefix == true then
+--            inlines[i] = pandoc.Str '\xc2\xa0' -- Both work
+          inlines[i] = insert
+        end
+        end
+
+      -- Check for dashes in Str after Space
+
+        if inlines[i + 1].t == 'Str' then
+          local dash = find_dashes(inlines[i + 1].text)
+            if dash == true then
+              inlines[i] = insert
+            end
+        end
+
+        -- Check for not fully parsed Str elements - Those might be products of
+        -- other filters, that were executed before this one
+
+        if inlines[i + 1].t == 'Str' then
+          if string.match(inlines[i + 1].text, '%.*%s*[„]?%d+[“]?%s*%.*') then
+              inlines[i] = insert
+            end
+        end
+
+    end
+
+      --[[
+      Check for Str containing sequence " prefix ", which might occur in case of
+      preceding filter creates it in one Str element. Also check, if quotation
+      mark is present introduced by "quotation.lua" filter
+      --]]
+
+      if inlines[i].t == 'Str' then
+        for index, prefix in ipairs(prefixes) do
+          if string.match(inlines[i].text, '%.*%s+[„]?' .. prefix .. '[“]?%s+%.*') then
+              front, detection, replacement, back = string.match(inlines[i].c,
+                '(%.*)(%s+[„]?' .. prefix .. '[“]?)(%s+)(%.*)')
+
+              inlines[i].text = front .. detection .. insert .. back
+            end
+        end
+      end
+
+  end
+  return inlines
+end
+
+-- This should change the order of running functions: Meta - Inlines - rest ...
+return {
+  {Meta = Meta},
+  {Inlines = Inlines},
+}
@@ -0,0 +1,28 @@
+---
+lang: cs
+---
+
+# Tests
+
+## Basic test
+
+a test i test k test o test s test u test v test z test A test I test K test O test S test U test V test Z test -- test – test
+
+## Test with numbers
+
+Test 19 test "19" test
+
+## Test of double prefixes.
+
+A i test, i v test, a k test, a v test.
+
+## Test of block code
+
+```
+a = 5
+k = "test"
+```
+
+## Test of inline code
+
+Test `a = 5` test
@@ -0,0 +1,28 @@
+---
+lang: cs
+---
+
+# Tests
+
+## Basic test
+
+a test i test A test I test the test The test -- test – test
+
+## Test with numbers
+
+Test 19 test "19" test
+
+## Test of double prefixes.
+
+A i test, i v test, a k test, a v test.
+
+## Test of block code
+
+```
+a = 5
+k = "test"
+```
+
+## Test of inline code
+
+Test `a = 5` test