home · contact · privacy
Stabilize feed.xml generation, reduce bad shell scripting.
authorChristian Heller <c.heller@plomlompom.de>
Sat, 10 Dec 2016 22:55:21 +0000 (23:55 +0100)
committerChristian Heller <c.heller@plomlompom.de>
Sat, 10 Dec 2016 22:55:21 +0000 (23:55 +0100)
README.md
processor/feed.xml.do
test.sh
test/test_files/feed.xml.ignoring
test/test_files/index.html
test/test_files/test.html
test/test_files/test.md

index 92047da94629eb39e998266eea9d46180a276f54..e3ced3c39d81ff69d2f6c3f3bef59453a46da4dc 100644 (file)
--- a/README.md
+++ b/README.md
@@ -33,9 +33,8 @@ customize the blog: ./url, ./author, ./uuid, ./title.)
 bugs
 ----
 
-Due to bad shell scripting in ./processor/index.html.do and
-./processor/feed.xml.do (see the FIXME notes there), source files whose names
-contain "$" break the redo processing.
+Due to bad shell scripting in ./processor/index.html.do (see the FIXME notes
+there), source files whose names contain "$" break the redo processing.
 
 Oh, and don't create a index.rst or index.md in the redo-managed directory,
 that will also break things.
index 1e492ebc47f8b4e7f1f2eb25e8598c0d985fd5ab..001024020b2da6f777c767b3fa26b9dcd7fae740 100644 (file)
@@ -1,5 +1,29 @@
 #!/bin/sh
 
+build_entry () {
+  file="${1}"
+  uuidfile="${2}"
+  published="${3}"
+  intermediate_file="${file%.*}.intermediate"
+  htmlfile=`escape_url "${file%.*}.html"`
+  redo-ifchange "$uuid_file"
+  redo-ifchange "$intermediate_file"
+  lastmod=`stat -c%y "$file"`
+  lastmod_rfc3339=`date -u "+%Y-%m-%dT%TZ" -d "$lastmod"`
+  title=`read_and_escape_file "$intermediate_file" | head -1`
+  uuid=`read_and_escape_file "$uuid_file" | head -1`
+  body=`read_and_escape_file "$intermediate_file" | sed 1d`
+  published_rfc3339=`date -u "+%Y-%m-%dT%TZ" -d "${published}"`
+  printf "<entry>\n"
+  printf "<title type=\"html\">%s</title>\n" "$title"
+  printf "<id>urn:uuid:%s</id>\n" "$uuid"
+  printf "<updated>%s</updated>\n" "$lastmod_rfc3339"
+  printf "<published>%s</published>\n" "$published_rfc3339"
+  printf "<link href=\"%s%s\" />\n" "$basepath" "${htmlfile#\./}"
+  printf "<content type=\"html\">\n%s\n</content>\n" "$body"
+  printf "</entry>"
+}
+
 # Pull in global dependencies.
 . ./helpers.sh
 url_file=url.meta
@@ -33,44 +57,36 @@ printf "<title type=\"html\">%s</title>\n" "$title"
 printf "<author><name>%s</name></author>\n" "$author"
 printf "<id>urn:uuid:%s</id>\n" "$uuid"
 
-# Iterate through most recent entries (go by lastmod date of source files) to
-# build feed head "updated" element, and individual entries.
-# FIXME: This ls parsing is a bad way to loop through the sorted files. Besides,
-# $'\0' is a bashism.
-first_run=0
-files=`ls -1t *.rst *.md | head -10 | tr '\n' $'\0'`
-oldIFS="$IFS"
-IFS=$'\0'
-for file in $files; do
-  lastmod=`stat -c%y "$file"`
-  lastmod_rfc3339=`date -u "+%Y-%m-%dT%TZ" -d "$lastmod"`
-  if [ "$first_run" -lt "1" ]; then
-    IFS="$oldIFS"
-    printf "<updated>%s</updated>\n\n" "$lastmod_rfc3339" 
-    first_run=1
+# Generate feed entry snippets.
+mkdir -p feed_snippets
+for file in ./*.rst ./*.md; do
+  if [ -e "$file" ]; then
+    uuid_file="${file%.*}.uuid"
+    redo-ifchange "$uuid_file"
+    published=`stat -c%y "${uuid_file}"`
+    published_unix=$(date -u "+%s%N" -d "${published}")
+    entry=$(build_entry "${file}" "${uuid_file}" "${published}")
+    echo "${entry}" > ./feed_snippets/${published_unix}
   fi
+done
 
-  # Build some variables and dependencies.
-  intermediate_file="${file%.*}.intermediate"
-  htmlfile=`escape_url "${file%.*}.html"`
-  uuid_file="${file%.*}.uuid"
-  redo-ifchange "$intermediate_file"
-  redo-ifchange "$uuid_file"
-  title=`read_and_escape_file "$intermediate_file" | head -1`
-  uuid=`read_and_escape_file "$uuid_file" | head -1`
-  body=`read_and_escape_file "$intermediate_file" | sed 1d`
-  published=`stat -c%y "$uuid_file"`
-  published_rfc3339=`date -u "+%Y-%m-%dT%TZ" -d "$published"`
+# Derive feed modification date from snippets.
+mod_dates=$(grep -hE "^<updated>" ./feed_snippets/* | sed -E 's/<.?updated>//g')
+last_mod_unix=0
+for date in $mod_dates; do
+  date_unix=$(date -u "+%s" -d "${date}")
+  if [ "$date_unix" -gt "$last_mod_unix" ]; then
+    last_mod_unix=$date_unix
+  fi
+done
+lastmod_rfc3339=`date -u "+%Y-%m-%dT%TZ" -d "@${last_mod_unix}"`
+printf "<updated>%s</updated>\n\n" "$lastmod_rfc3339"
 
-  # Write entry.
-  printf "<entry>\n"
-  printf "<title type=\"html\">%s</title>\n" "$title"
-  printf "<id>urn:uuid:%s</id>\n" "$uuid" 
-  printf "<updated>%s</updated>\n" "$lastmod_rfc3339" 
-  printf "<published>%s</published>\n" "$published_rfc3339" 
-  printf "<link href=\"%s%s\" />\n" "$basepath" "$htmlfile"
-  printf "<content type=\"html\">\n%s\n</content>\n" "$body"
-  printf "</entry>\n\n"
+# Write feed entries.
+for file in ./feed_snippets/*; do
+  cat "${file}"
+  printf "\n"
 done
+rm -rf feed_snippets
 
 printf "</feed>"
diff --git a/test.sh b/test.sh
index e333c4059294e1debc672de7266220a2869a6749..d930e6697d3763bb827eb6f254307b0693a59920 100755 (executable)
--- a/test.sh
+++ b/test.sh
@@ -32,12 +32,14 @@ generated_files_dir_escaped="test\\/test_dir"
 rm -rf "$generated_files_dir" 
 ./add_dir.sh "$generated_files_dir" 
 working_dir=$(pwd)
-cp "$expected_files_dir"/test.md "$generated_files_dir"/
-cp "$expected_files_dir"/foo.rst "$generated_files_dir"/
-cp "$expected_files_dir"/bar\ baz.md "$generated_files_dir"/
 cd "$generated_files_dir"
+cp "$working_dir/$expected_files_dir"/test.md .
 redo
-cd "$working_dir" 
+cp "$working_dir/$expected_files_dir"/bar\ baz.md .
+redo
+cp "$working_dir/$expected_files_dir"/foo.rst .
+redo
+cd "$working_dir"
 
 # Simple file comparison tests and UUID tests.
 uuid_test "$generated_files_dir""/uuid.meta"
index 039a1163d1adf3c0451606006d80535eec08946d..0d6ea0d4d9112db220ec32d2c88ccb4caaadaa3a 100644 (file)
@@ -8,36 +8,36 @@
 <updated>IGNORE</updated>
 
 <entry>
-<title type="html">foo</title>
+<title type="html">foo &lt;em&gt;bar&lt;/em&gt; &lt;strong&gt;baz&lt;/strong&gt;</title>
 <id>urn:uuid:IGNORE</id>
 <updated>IGNORE</updated>
 <published>IGNORE</published>
-<link href="http://example.org/bar%20baz.html" />
+<link href="http://example.org/test.html" />
 <content type="html">
-&lt;p&gt;bar&lt;/p&gt;
+&lt;p&gt;foo&lt;/p&gt;
 </content>
 </entry>
 
 <entry>
-<title type="html">a title with some nasty characters: &amp;amp;&amp;lt;&amp;gt;&amp;quot;&#x27;</title>
+<title type="html">foo</title>
 <id>urn:uuid:IGNORE</id>
 <updated>IGNORE</updated>
 <published>IGNORE</published>
-<link href="http://example.org/foo.html" />
+<link href="http://example.org/bar%20baz.html" />
 <content type="html">
-&lt;p&gt;this text contains some special characters: &#x27;&amp;quot;&amp;gt;&amp;lt;&amp;amp;&lt;/p&gt;
-&lt;p&gt;and more than one paragraph&lt;/p&gt;
+&lt;p&gt;bar&lt;/p&gt;
 </content>
 </entry>
 
 <entry>
-<title type="html">foo &lt;em&gt;bar&lt;/em&gt; &lt;strong&gt;baz&lt;/strong&gt;</title>
+<title type="html">a title with some nasty characters: &amp;amp;&amp;lt;&amp;gt;&amp;quot;&#x27;</title>
 <id>urn:uuid:IGNORE</id>
 <updated>IGNORE</updated>
 <published>IGNORE</published>
-<link href="http://example.org/test.html" />
+<link href="http://example.org/foo.html" />
 <content type="html">
-&lt;p&gt;bar&lt;/p&gt;
+&lt;p&gt;this text contains some special characters: &#x27;&amp;quot;&amp;gt;&amp;lt;&amp;amp;&lt;/p&gt;
+&lt;p&gt;and more than one paragraph&lt;/p&gt;
 </content>
 </entry>
 
index f5b755c5370dc43a371d21714297d3f0e0c7ff2a..3210fef4d4d8597c80a4201b516fe1ee207c7aa4 100644 (file)
@@ -6,8 +6,8 @@
 <body>
 <h1>Yet another blog</h1>
 <ul>
-<li><a href="bar%20baz.html" />foo</a></li>
 <li><a href="foo.html" />a title with some nasty characters: &amp;&lt;&gt;&quot;'</a></li>
+<li><a href="bar%20baz.html" />foo</a></li>
 <li><a href="test.html" />foo <em>bar</em> <strong>baz</strong></a></li>
 </ul>
 </body>
index 787bf0e52f1dc326000b851d18376f9748ecfcbc..565355d34971942d0618dde487737528d0f8aa80 100644 (file)
@@ -6,7 +6,7 @@
 <body>
 <h1>foo <em>bar</em> <strong>baz</strong></h1>
 <section>
-<p>bar</p>
+<p>foo</p>
 </section>
 </body>
 </html>
\ No newline at end of file
index 3dd09f76189684642223e77ed1e02f1e090db21c..4416db16f80d5ee279652e1a598bf295ac656263 100644 (file)
@@ -1,2 +1,2 @@
 % foo *bar* **baz**
-bar
+foo