From f7676bb9cabab8d408d220051e4857c054a55efd Mon Sep 17 00:00:00 2001
From: Christian Heller <c.heller@plomlompom.de>
Date: Sat, 10 Dec 2016 23:55:21 +0100
Subject: [PATCH] Stabilize feed.xml generation, reduce bad shell scripting.

---
 README.md                         |  5 +-
 processor/feed.xml.do             | 86 ++++++++++++++++++-------------
 test.sh                           | 10 ++--
 test/test_files/feed.xml.ignoring | 20 +++----
 test/test_files/index.html        |  2 +-
 test/test_files/test.html         |  2 +-
 test/test_files/test.md           |  2 +-
 7 files changed, 72 insertions(+), 55 deletions(-)

diff --git a/README.md b/README.md
index 92047da..e3ced3c 100644
--- a/README.md
+++ b/README.md
@@ -33,9 +33,8 @@ customize the blog: ./url, ./author, ./uuid, ./title.)
 bugs
 ----
 
-Due to bad shell scripting in ./processor/index.html.do and
-./processor/feed.xml.do (see the FIXME notes there), source files whose names
-contain "$" break the redo processing.
+Due to bad shell scripting in ./processor/index.html.do (see the FIXME notes
+there), source files whose names contain "$" break the redo processing.
 
 Oh, and don't create a index.rst or index.md in the redo-managed directory,
 that will also break things.
diff --git a/processor/feed.xml.do b/processor/feed.xml.do
index 1e492eb..0010240 100644
--- a/processor/feed.xml.do
+++ b/processor/feed.xml.do
@@ -1,5 +1,29 @@
 #!/bin/sh
 
+build_entry () {
+  file="${1}"
+  uuidfile="${2}"
+  published="${3}"
+  intermediate_file="${file%.*}.intermediate"
+  htmlfile=`escape_url "${file%.*}.html"`
+  redo-ifchange "$uuid_file"
+  redo-ifchange "$intermediate_file"
+  lastmod=`stat -c%y "$file"`
+  lastmod_rfc3339=`date -u "+%Y-%m-%dT%TZ" -d "$lastmod"`
+  title=`read_and_escape_file "$intermediate_file" | head -1`
+  uuid=`read_and_escape_file "$uuid_file" | head -1`
+  body=`read_and_escape_file "$intermediate_file" | sed 1d`
+  published_rfc3339=`date -u "+%Y-%m-%dT%TZ" -d "${published}"`
+  printf "<entry>\n"
+  printf "<title type=\"html\">%s</title>\n" "$title"
+  printf "<id>urn:uuid:%s</id>\n" "$uuid"
+  printf "<updated>%s</updated>\n" "$lastmod_rfc3339"
+  printf "<published>%s</published>\n" "$published_rfc3339"
+  printf "<link href=\"%s%s\" />\n" "$basepath" "${htmlfile#\./}"
+  printf "<content type=\"html\">\n%s\n</content>\n" "$body"
+  printf "</entry>"
+}
+
 # Pull in global dependencies.
 . ./helpers.sh
 url_file=url.meta
@@ -33,44 +57,36 @@ printf "<title type=\"html\">%s</title>\n" "$title"
 printf "<author><name>%s</name></author>\n" "$author"
 printf "<id>urn:uuid:%s</id>\n" "$uuid"
 
-# Iterate through most recent entries (go by lastmod date of source files) to
-# build feed head "updated" element, and individual entries.
-# FIXME: This ls parsing is a bad way to loop through the sorted files. Besides,
-# $'\0' is a bashism.
-first_run=0
-files=`ls -1t *.rst *.md | head -10 | tr '\n' $'\0'`
-oldIFS="$IFS"
-IFS=$'\0'
-for file in $files; do
-  lastmod=`stat -c%y "$file"`
-  lastmod_rfc3339=`date -u "+%Y-%m-%dT%TZ" -d "$lastmod"`
-  if [ "$first_run" -lt "1" ]; then
-    IFS="$oldIFS"
-    printf "<updated>%s</updated>\n\n" "$lastmod_rfc3339" 
-    first_run=1
+# Generate feed entry snippets.
+mkdir -p feed_snippets
+for file in ./*.rst ./*.md; do
+  if [ -e "$file" ]; then
+    uuid_file="${file%.*}.uuid"
+    redo-ifchange "$uuid_file"
+    published=`stat -c%y "${uuid_file}"`
+    published_unix=$(date -u "+%s%N" -d "${published}")
+    entry=$(build_entry "${file}" "${uuid_file}" "${published}")
+    echo "${entry}" > ./feed_snippets/${published_unix}
   fi
+done
 
-  # Build some variables and dependencies.
-  intermediate_file="${file%.*}.intermediate"
-  htmlfile=`escape_url "${file%.*}.html"`
-  uuid_file="${file%.*}.uuid"
-  redo-ifchange "$intermediate_file"
-  redo-ifchange "$uuid_file"
-  title=`read_and_escape_file "$intermediate_file" | head -1`
-  uuid=`read_and_escape_file "$uuid_file" | head -1`
-  body=`read_and_escape_file "$intermediate_file" | sed 1d`
-  published=`stat -c%y "$uuid_file"`
-  published_rfc3339=`date -u "+%Y-%m-%dT%TZ" -d "$published"`
+# Derive feed modification date from snippets.
+mod_dates=$(grep -hE "^<updated>" ./feed_snippets/* | sed -E 's/<.?updated>//g')
+last_mod_unix=0
+for date in $mod_dates; do
+  date_unix=$(date -u "+%s" -d "${date}")
+  if [ "$date_unix" -gt "$last_mod_unix" ]; then
+    last_mod_unix=$date_unix
+  fi
+done
+lastmod_rfc3339=`date -u "+%Y-%m-%dT%TZ" -d "@${last_mod_unix}"`
+printf "<updated>%s</updated>\n\n" "$lastmod_rfc3339"
 
-  # Write entry.
-  printf "<entry>\n"
-  printf "<title type=\"html\">%s</title>\n" "$title"
-  printf "<id>urn:uuid:%s</id>\n" "$uuid" 
-  printf "<updated>%s</updated>\n" "$lastmod_rfc3339" 
-  printf "<published>%s</published>\n" "$published_rfc3339" 
-  printf "<link href=\"%s%s\" />\n" "$basepath" "$htmlfile"
-  printf "<content type=\"html\">\n%s\n</content>\n" "$body"
-  printf "</entry>\n\n"
+# Write feed entries.
+for file in ./feed_snippets/*; do
+  cat "${file}"
+  printf "\n"
 done
+rm -rf feed_snippets
 
 printf "</feed>"
diff --git a/test.sh b/test.sh
index e333c40..d930e66 100755
--- a/test.sh
+++ b/test.sh
@@ -32,12 +32,14 @@ generated_files_dir_escaped="test\\/test_dir"
 rm -rf "$generated_files_dir" 
 ./add_dir.sh "$generated_files_dir" 
 working_dir=$(pwd)
-cp "$expected_files_dir"/test.md "$generated_files_dir"/
-cp "$expected_files_dir"/foo.rst "$generated_files_dir"/
-cp "$expected_files_dir"/bar\ baz.md "$generated_files_dir"/
 cd "$generated_files_dir"
+cp "$working_dir/$expected_files_dir"/test.md .
 redo
-cd "$working_dir" 
+cp "$working_dir/$expected_files_dir"/bar\ baz.md .
+redo
+cp "$working_dir/$expected_files_dir"/foo.rst .
+redo
+cd "$working_dir"
 
 # Simple file comparison tests and UUID tests.
 uuid_test "$generated_files_dir""/uuid.meta"
diff --git a/test/test_files/feed.xml.ignoring b/test/test_files/feed.xml.ignoring
index 039a116..0d6ea0d 100644
--- a/test/test_files/feed.xml.ignoring
+++ b/test/test_files/feed.xml.ignoring
@@ -8,36 +8,36 @@
 <updated>IGNORE</updated>
 
 <entry>
-<title type="html">foo</title>
+<title type="html">foo &lt;em&gt;bar&lt;/em&gt; &lt;strong&gt;baz&lt;/strong&gt;</title>
 <id>urn:uuid:IGNORE</id>
 <updated>IGNORE</updated>
 <published>IGNORE</published>
-<link href="http://example.org/bar%20baz.html" />
+<link href="http://example.org/test.html" />
 <content type="html">
-&lt;p&gt;bar&lt;/p&gt;
+&lt;p&gt;foo&lt;/p&gt;
 </content>
 </entry>
 
 <entry>
-<title type="html">a title with some nasty characters: &amp;amp;&amp;lt;&amp;gt;&amp;quot;&#x27;</title>
+<title type="html">foo</title>
 <id>urn:uuid:IGNORE</id>
 <updated>IGNORE</updated>
 <published>IGNORE</published>
-<link href="http://example.org/foo.html" />
+<link href="http://example.org/bar%20baz.html" />
 <content type="html">
-&lt;p&gt;this text contains some special characters: &#x27;&amp;quot;&amp;gt;&amp;lt;&amp;amp;&lt;/p&gt;
-&lt;p&gt;and more than one paragraph&lt;/p&gt;
+&lt;p&gt;bar&lt;/p&gt;
 </content>
 </entry>
 
 <entry>
-<title type="html">foo &lt;em&gt;bar&lt;/em&gt; &lt;strong&gt;baz&lt;/strong&gt;</title>
+<title type="html">a title with some nasty characters: &amp;amp;&amp;lt;&amp;gt;&amp;quot;&#x27;</title>
 <id>urn:uuid:IGNORE</id>
 <updated>IGNORE</updated>
 <published>IGNORE</published>
-<link href="http://example.org/test.html" />
+<link href="http://example.org/foo.html" />
 <content type="html">
-&lt;p&gt;bar&lt;/p&gt;
+&lt;p&gt;this text contains some special characters: &#x27;&amp;quot;&amp;gt;&amp;lt;&amp;amp;&lt;/p&gt;
+&lt;p&gt;and more than one paragraph&lt;/p&gt;
 </content>
 </entry>
 
diff --git a/test/test_files/index.html b/test/test_files/index.html
index f5b755c..3210fef 100644
--- a/test/test_files/index.html
+++ b/test/test_files/index.html
@@ -6,8 +6,8 @@
 <body>
 <h1>Yet another blog</h1>
 <ul>
-<li><a href="bar%20baz.html" />foo</a></li>
 <li><a href="foo.html" />a title with some nasty characters: &amp;&lt;&gt;&quot;'</a></li>
+<li><a href="bar%20baz.html" />foo</a></li>
 <li><a href="test.html" />foo <em>bar</em> <strong>baz</strong></a></li>
 </ul>
 </body>
diff --git a/test/test_files/test.html b/test/test_files/test.html
index 787bf0e..565355d 100644
--- a/test/test_files/test.html
+++ b/test/test_files/test.html
@@ -6,7 +6,7 @@
 <body>
 <h1>foo <em>bar</em> <strong>baz</strong></h1>
 <section>
-<p>bar</p>
+<p>foo</p>
 </section>
 </body>
 </html>
\ No newline at end of file
diff --git a/test/test_files/test.md b/test/test_files/test.md
index 3dd09f7..4416db1 100644
--- a/test/test_files/test.md
+++ b/test/test_files/test.md
@@ -1,2 +1,2 @@
 % foo *bar* **baz**
-bar
+foo
-- 
2.30.2