kagero (5021B)
1 #!/bin/sh 2 # kagero: render webpages. 3 # newlines with slash and following white spaces (tabs and spaces) are to be 4 # deleted. 5 #TODO: stat(1) is not in POSIX: 7/14 removed stat(1) but not yet tested well. 6 # pathnames should not contain white spaces 7 # strftime() in awk is not POSIX. 8 #BUG: non html files are not copied after changed its directory. 9 # strftime() for awk is not POSIX compliant 10 11 man=$(pwd)/man 12 pub=$(pwd)/pub 13 temp=$(pwd)/temp 14 data=$(pwd)/data 15 weblog=$data/weblog 16 header="$temp/header.html" 17 footer="$temp/footer.html" 18 title="No Title" 19 rss_description="RSS" 20 rss="$pub/rss.xml" 21 url="https://www.example.com" 22 . $data/conf 23 24 ignore=$(printf ' ! -name "." ! -path "*/.*" ') 25 26 error() { echo "error: " "$1" >&2; exit 1; } 27 should_exist_dir() { [ ! -d "$1" ] && error "$1 should exist and be a directory"; } 28 should_not_exist() { [ -e "$1" ] && error "$1 should not exist"; } 29 30 usage(){ 31 echo 'usage: '${0##*/}' [init|render]' 32 echo ' init: initialize current directory for static site' 33 echo ' render: render static pages' 34 } 35 36 list_files_to_replace(){ 37 new_temp=$(cd "$temp" && eval "find . -type f ! -name '.' $ignore -newer $weblog") 38 if [ "$new_temp" ]; then 39 (cd "$man" && eval "find . -type f -name '*.html' $ignore" 40 eval "find . -type f ! -name '*.html' $ignore -newer $weblog") 41 else 42 (cd "$man" && eval "find . -type f $ignore -newer $weblog") 43 fi 44 } 45 46 render_html(){ 47 title=$(grep '<h1 ' "$man/$1" | sed '1q' | sed 's;^.*<h1[^>]*>\(.*\)</h1>.*$;\1;' | sed 's;/;\\/;g') 48 cat "$header" "$man/$1" "$footer" | 49 sed '/\\$/{:a;N;/\\$/{$!ba;};s/\([^\]\)\\\n[ ]*/\1/g;}' | 50 sed 's/\\\\$/\\/' | 51 sed 's/<!--title-->/'"$title"'/' 52 } 53 54 render_sitemap(){ 55 echo '<?xml version="1.0" encoding="UTF-8"?>' 56 echo '<urlset xmlns="http://www.sitemaps.org/schemas/sitemap/0.9">' 57 cat | 58 awk '{print strftime("%Y-%m-%d", $1), $2}' | #not posix 59 sort -hr | 60 sed 's! \./! /!;s!/index.html!/!' | 61 sed -E 's;(^....-..-..?) (.*$);<url><loc>'"$url"'\2</loc><lastmod>\1</lastmod></url>;' 62 echo '</urlset>' 63 } 64 65 render_rss(){ 66 build_date=$(date '+%a, %e %b %Y %H:%M:%S %z') 67 echo '<?xml version="1.0" encoding="UTF-8"?>' 68 echo '<rss version="2.0">' 69 echo '<channel>' 70 echo '<title>'"$title"'</title>' 71 echo '<description>'"$rss_description"'</description>' 72 echo '<language>ja-jp</language>' 73 echo '<link>'"$url"'</link>' 74 echo '<lastBuildDate>'"$build_date"'</lastBuildDate>' 75 echo '<pubDate>'"$build_date"'</pubDate>' 76 echo '<docs>https://www.rssboard.org/rss-specification</docs>' 77 78 cat | 79 while read date file; do 80 atitle=$(grep '<h1>' $man$file | sed 's;<h1>\(.*\)</h1>;\1;') 81 date=$(echo $date | awk '{print strftime("%a, %e %b %Y %H:%M:%S %z", $1)}') #not posix 82 echo '<item>' 83 echo '<title>'"$atitle"'</title>' 84 echo '<link>'"$url$file"'</link>' 85 echo '<guid>'"$url$file"'</guid>' 86 echo '<pubDate>'"$date"'</pubDate>' 87 printf '<description><![CDATA[' 88 cat "$man$file" | sed '/\\$/{:a;N;/\\$/{$!ba;};s/\\\n[ ]*//g;}' 89 echo ']]></description>' 90 echo '</item>' 91 done 92 echo '</channel>' 93 echo '</rss>' 94 } 95 96 render_weblog(){ 97 e=$(printf ' -name "*.html" ! -name "." ! -path "./error/*" ! -path "./draft/*"') 98 if [ -f "$weblog" ]; then 99 (cd "$man" && eval "find . -type f $e $ignore -newer $weblog" | 100 ls_files_with_unix_time ) | 101 sort -h | 102 sed 's! \./! /!' >> "$weblog" 103 else 104 (cd "$man" && eval "find . -type f $e $ignore" | 105 ls_files_with_unix_time ) | 106 sort -h | 107 sed 's! \./! /!' > "$weblog" 108 fi 109 } 110 111 ls_files_with_unix_time(){ 112 Y=$(date "+%Y") 113 fs=$(cat | tr '\n' ' ') 114 if [ -n "$fs" ]; then 115 eval "ls -l $fs" | 116 awk '{print $8, $6, $7, $9}' | 117 awk -v Y=$Y '$1!~/:/{print}$1~/:/{$1=Y; print}' | 118 sed 's/Jan/01/; s/Feb/02/; s/Mar/03/; s/Apr/04/; s/May/05/; s/Jun/06/; 119 s/Jul/07/; s/Aug/08/; s/Sep/09/; s/Oct/10/; s/Nov/11/; s/Dec/12/' | 120 awk '{printf "%s %02s %02s 00 00 00 %s\n", $1, $2, $3, $4}' | 121 awk '{print mktime($1" "$2" "$3" "$4" "$5" "$6), $7}' #non-posix 122 fi 123 } 124 125 # main 126 should_exist_dir "$man" 127 should_exist_dir "$pub" 128 should_exist_dir "$temp" 129 should_exist_dir "$data" 130 131 case "$1" in 132 init) 133 echo 'not implemented' >&2 134 exit 1; 135 ;; 136 render) 137 (cd "$man" && eval "find . -type d $ignore" | cpio -pdu "$pub") 138 fs=$( 139 if [ -f "$weblog" ]; then 140 list_files_to_replace 141 else 142 cd "$man" && eval "find . -type f $ignore" 143 fi 144 ) 145 if [ -n "$fs" ]; then 146 echo "$fs" | grep '\.html$' | 147 while read f; do #bottle neck 148 render_html "$f" > "$pub/$f" 149 touch -r "$man/$f" "$pub/$f" 150 done 151 echo "$fs" | grep -Ev '\.html$' | 152 (cd "$man" && cpio -pu "$pub") 153 fi 154 155 # delete deleted $man files from $pub 156 (cd $man && find .) | sed 's/^.*$/\! -path "&"/' | tr '\n' ' ' | 157 sed 's;^.*$;\(cd '$pub' \&\& find . & -delete\);' | 158 sh -s 159 160 render_weblog #>> "$weblog" # this redirect doesn't work for some reason 161 sort -k2 -r "$weblog" | uniq -f1 | sort -k1 -r | 162 render_sitemap > "$pub/sitemap.xml" 163 sort -k2 -r "$weblog" | uniq -f1 | sort -k1 -r | grep -v index.html | 164 render_rss > "$rss" #bottle neck 165 ;; 166 *) 167 usage >&2 168 exit 1 169 ;; 170 esac