#!/usr/bin/env bash
# Copyright (C) 2013-2016 Luke Shumaker <lukeshu@sbcglobal.net>
#
# License: GNU GPLv2+
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program.  If not, see <http://www.gnu.org/licenses/>.

export TEXTDOMAIN='librelib'
export TEXTDOMAINDIR='/usr/share/locale'

default_simple=(
	--keyword={eval_,}{gettext,'ngettext:1,2'}
	--keyword={_,print,term_title}
	--keyword={msg,msg2,warning,error,stat_busy,die}
	--keyword={lock,slock}:3
)
default_prose=(--keyword={prose,bullet})

readonly default_simple default_prose

if ! type gettext &>/dev/null; then
	gettext() { echo "$@"; }
fi

errusage() {
	if [[ $# -gt 0 ]]; then
		fmt="$(gettext "$1")"; shift
		printf "${0##*/}: $fmt\n" "$@"
	fi
	usage >&2
}

usage() {
	. libremessages
	print 'Usage: %s [OPTIONS] FILES...' "${0##*/}"
	print 'Generates .pot files for programs using libremessages'
	echo
	prose 'In librexgettext, there are 2 types of keywords:'
	bullet 'simple: Simple keywords are just like normal xgettext'
	bullet 'prose: Prose keywords are similar, but the text is
	        word-wrapped'
	prose 'The keyword format is the same as in GNU xgettext.'
	echo
	prose 'The libremessages `flag` command is also handled
	       specially, and is not configurable as a keyword.'
	echo
	prose 'The default simple keywords are: %s' "${default_simple[*]#--keyword=}"
	echo
	prose 'The default prose keywords are: %s' "${default_prose[*]#--keyword=}"
	echo
	print 'Options:'
	flag \
	    '--simple=KEYWORD' 'Look for KEYWORD as an additional simple keyword' \
	    '--prose=KEYWORD' 'Look for KEYWORD as an additional prose keyword' \
	    '-k' 'Disable using the default keywords' \
	    '-h, --help' 'Show this text'
}

xgettext-sh() {
	xgettext --omit-header --from-code=UTF-8 -L shell -k -o - "$@"
}

xgettext-flag() {
	{
		# Stage 1: Generate
		#
		# Get all of the arguments to `flag`.  Because `flag`
		# takes an arbitrary number of arguments, just iterate
		# through arg1, arg2, ... argN; until we've come up
		# empty 3 times.  Why 3?  Because each flag takes 2
		# arguments, and because we don't keep track of which
		# one of those we're on, waiting for 3 empties ensures
		# us that we've had a complete "round" with nothing.
		#
		# Why can't I just do i+=2, and not have to keep track
		# of empties?  Because, we also allow for arguments
		# ending in a colon to be headings, which changes the
		# offsets.
		declare -i empties=0
		declare -i i
		for (( i=1; empties < 3; i++ )); do
			local out
			out="$(xgettext-sh --keyword="flag:$i,\"$i\"" "$@")"
			if [[ -n $out ]]; then
				printf -- '%s\n' "$out"
			else
				empties+=1
			fi
		done
	} | whitespace-collapse | sed '/^\#, sh-format/d' | {
		# Stage 2: Parse
		#
		# Read in the lines, and group them into an array of
		# (multi-line) msgs.  This just makes working with
		# them easier.
		local msgs=()
		declare -i i=-1
		local re='^#\. ([0-9]+)$'
		IFS=''
		local line
		while read -r line; do
			if [[ $line =~ $re ]]; then
				i+=1
			fi
			msgs[$i]+="$line"$'\n'
		done
		# Stage 3: Sort
		#
		# Now, we have the `msgs` array, and it is
		# sorted such that it is all of the arg1's to `flag`,
		# then all of the arg2's, then all of the arg3's, and
		# so on.  We want to re-order them such that it's all
		# of the args for the first invocation then all of the
		# args for the second; and so on.
		#
		# We do this by simply sorting them by the location
		# that they appear in the file.  Then, when we see the
		# argument number go back down to 1, we know that a
		# new invocation has started!
		IFS=$'\n'
		local locations=($(
			local i
			for i in "${!msgs[@]}"; do
				declare -i arg row
				local lines=(${msgs[$i]})
				arg=${lines[0]#'#. '}
				row=${lines[1]##*:}
				printf '%d.%d %d\n' "$row" "$arg" "$i"
			done | sort -n
		))
		# Stage 4: Output
		#
		# Now, we prune out the arguments that aren't
		# localizable.  Also, remove the "#." comment lines.
		# As explained above (in stage 3), when we see $arg go
		# to 1, that's the beginning of a new invocation.
		local expectflag=true
		local location
		for location in "${locations[@]}"; do
			IFS=' .'
			local row arg i
			read -r row arg i <<<"$location"
			local msg="${msgs[$i]#*$'\n'}"
			# Now we operate based on $row, $arg, and $msg
			if [[ $arg == 1 ]]; then
				expectflag=true
			fi
			if $expectflag; then
				IFS=$'\n'
				local lines=(${msg})
				if [[ ${lines[1]} == *':"' ]]; then
					# We expected a flag, but got
					# a heading
					printf -- '%s\n' "$msg"
				else
					# We expected a flag, and got
					# one!
					expectflag=false
				fi
			else
				printf -- '%s\n' "$msg"
				expectflag=true
			fi
		done
	}
}

whitespace-collapse() {
	tr '\n' '\r' | sed 's/"\r\s*"//g' | tr '\r' '\n' | # This removes the awkward word-wrapping done by xgettext
	    sed -r -e 's/(\\n|\\t|\t)/ /g' -e 's/(^|[^.!? ]) +/\1 /g' -e 's/([.!?])  +/\1  /g' # This collapses whitespace
}

main() {
	local simple=()
	local prose=()
	local files=()
	local use_defaults=true
	local error=false

	declare -i i
	for (( i=1; i <= $#; i++ )); do
		case "${!i}" in
			--simple) i+=1; simple+=(--keyword="${!i}");;
			--simple=*)     simple+=(--keyword="${!i#*=}");;
			--prose) i+=1; prose+=(--keyword="${!i}");;
			--prose=*)     prose+=(--keyword="${!i#*=}");;
			-k) use_defaults=false;;
			--help|-h) usage; return 0;;
			--) i+=1; break;;
			-*) errusage "unrecognized option: %s" "${!i}"; error=true;;
			*) files+=("${!i}");;
		esac
	done
	files+=("${@:$i}")
	if [[ ${#files[@]} -lt 1 ]]; then
		errusage "no input file given"
		error=true
	fi
	if "$error"; then
		return 1
	fi
	if "$use_defaults"; then
		simple+=("${default_simple[@]}")
		prose+=("${default_prose[@]}")
	fi

	# Main code
	{
		xgettext-sh "${simple[@]}" -- "${files[@]}"
		xgettext-sh "${prose[@]}" -- "${files[@]}" | whitespace-collapse
		xgettext-flag -- "${files[@]}"
	} | sed '/^\#, sh-format/d' | msguniq -Fi --to-code=UTF-8
}

main "$@"