#! /usr/bin/env python3

"""Extract configuration items into various configuration headers.

This uses the configitems file, a database consisting of text lines with the
following single-tab-separated fields:
 - Name of the configuration item, e.g. PQXX_HAVE_PTRDIFF_T.
 - Publication marker: public or internal.
 - A single environmental factor determining the item, e.g. libpq or compiler.
"""

from __future__ import (
    absolute_import,
    print_function,
    unicode_literals,
    )

from argparse import ArgumentParser
import codecs
from errno import ENOENT
import os.path
from os import getcwd
import re
from sys import (
    getdefaultencoding,
    getfilesystemencoding,
    stdout,
    )

__metaclass__ = type


def guess_fs_encoding():
    """Try to establish the filesystem encoding.

    It's a sad thing: some guesswork is involved.  The encoding often seems to
    be conservatively, and incorrectly, set to ascii.
    """
    candidates = [
        getfilesystemencoding(),
        getdefaultencoding(),
        'utf-8',
        ]
    for encoding in candidates:
        lower = encoding.lower()
        if lower != 'ascii' and lower != 'ansi_x3.4-1968':
            return encoding
    raise AssertionError("unreachable code reached.")


def guess_output_encoding():
    """Return the encoding of standard output."""
    # Apparently builds in Docker containers may have None as an encoding.
    # Fall back to ASCII.  If this ever happens in a non-ASCII path, well,
    # there may be a more difficult decision to be made.  We'll burn that
    # bridge when we get to it, as they almost say.
    return stdout.encoding or 'ascii'


def decode_path(path):
    """Decode a path element from bytes to unicode string."""
    return path.decode(guess_fs_encoding())


def encode_path(path):
    """Encode a path element from unicode string to bytes."""
    # Nasty detail: unicode strings are stored as UTF-16.  Which can contain
    # surrogate pairs.  And those break in encoding, unless you use this
    # special error handler.
    return path.encode(guess_fs_encoding(), 'surrogateescape')


def read_text_file(path, encoding='utf-8'):
    """Read text file, return as string, or `None` if file is not there."""
    assert isinstance(path, type(''))
    try:
        with codecs.open(encode_path(path), encoding=encoding) as stream:
            return stream.read()
    except IOError as error:
        if error.errno == ENOENT:
            return None
        else:
            raise


def read_lines(path, encoding='utf-8'):
    """Read text file, return as list of lines."""
    assert isinstance(path, type(''))
    with codecs.open(encode_path(path), encoding=encoding) as stream:
        return list(stream)


def read_configitems(filename):
    """Read the configuration-items database.

    :param filename: Path to the configitems file.
    :return: Sequence of text lines from configitems file.
    """
    return [line.split() for line in read_lines(filename)]


def map_configitems(items):
    """Map each config item to publication/factor.

    :param items: Sequence of config items: (name, publication, factor).
    :return: Dict mapping each item name to a tuple (publication, factor).
    """
    return {
        item: (publication, factor)
        for item, publication, factor in items
        }


def read_header(source_tree, filename):
    """Read the original config.h generated by autoconf.

    :param source_tree: Path to libpqxx source tree.
    :param filename: Path to the config.h file.
    :return: Sequence of text lines from config.h.
    """
    assert isinstance(source_tree, type(''))
    assert isinstance(filename, type(''))
    return read_lines(os.path.join(source_tree, filename))


def extract_macro_name(config_line):
    """Extract a cpp macro name from a configuration line.

    :param config_line: Text line from config.h which may define a macro.
    :return: Name of macro defined in `config_line` if it is a `#define`
        statement, or None.
    """
    config_line = config_line.strip()
    match = re.match('\s*#\s*define\s+([^\s]+)', config_line)
    if match is None:
        return None
    else:
        return match.group(1)


def extract_section(header_lines, items, publication, factor):
    """Extract config items for given publication/factor from header lines.

    :param header_lines: Sequence of header lines from config.h.
    :param items: Dict mapping macro names to (publication, factor).
    :param publication: Extract only macros for this publication tag.
    :param factor: Extract only macros for this environmental factor.
    :return: Sequence of `#define` lines from `header_lines` insofar they
        fall within the requested section.
    """
    return sorted(
        line.strip()
        for line in header_lines
        if items.get(extract_macro_name(line)) == (publication, factor)
        )


def compose_header(lines, publication, factor):
    """Generate header text containing given lines."""
    intro = (
        "/* Automatically generated from config.h: %s/%s config. */"
        % (publication, factor)
        )
    return '\n'.join([intro, ''] + lines + [''])


def generate_config(source_tree, header_lines, items, publication, factor):
    """Generate config file for a given section, if appropriate.

    Writes nothing if the configuration file ends up identical to one that's
    already there.

    :param source_tree: Location of the libpqxx source tree.
    :param header_lines: Sequence of header lines from config.h.
    :param items: Dict mapping macro names to (publication, factor).
    :param publication: Extract only macros for this publication tag.
    :param factor: Extract only macros for this environmental factor.
    """
    assert isinstance(source_tree, type(''))
    config_file = os.path.join(
        source_tree, 'include', 'pqxx',
        'config-%s-%s.h' % (publication, factor))
    unicode_path = config_file.encode(guess_output_encoding(), 'replace')
    section = extract_section(header_lines, items, publication, factor)
    contents = compose_header(section, publication, factor)
    if read_text_file(config_file) == contents:
        print("Generating %s: no changes--skipping." % unicode_path)
        return

    print("Generating %s: %d item(s)." % (unicode_path, len(section)))
    path = encode_path(config_file)
    with codecs.open(path, 'wb', encoding='ascii') as header:
        header.write(contents)


def parse_args():
    """Parse command-line arguments."""
    default_source_tree = os.path.dirname(
        os.path.dirname(os.path.normpath(os.path.abspath(__file__))))
    parser = ArgumentParser(description=__doc__)
    parser.add_argument(
        'sourcetree', metavar='PATH', default=default_source_tree,
        help="Location of libpqxx source tree.  Defaults to '%(default)s'.")
    return parser.parse_args()


def check_args(args):
    """Validate command-line arguments."""
    if not os.path.isdir(args.sourcetree):
        raise Exception("Not a directory: '%s'." % args.sourcetree)


def get_current_dir():
    cwd = getcwd()
    if isinstance(cwd, bytes):
        return decode_path(cwd)
    else:
        return cwd


def main():
    """Main program entry point."""
    args = parse_args()
    check_args(args)
    # The configitems file is under revision control; it's in sourcetree.
    items = read_configitems(os.path.join(args.sourcetree, 'configitems'))
    publications = sorted(set(item[1] for item in items))
    factors = sorted(set(item[2] for item in items))
    # The config.h header is generated; it's in the build tree, which should
    # be where we are.
    directory = get_current_dir()
    original_header = read_header(
        directory,
        os.path.join('include', 'pqxx', 'config.h'))
    items_map = map_configitems(items)

    for publication in publications:
        for factor in factors:
            generate_config(
                directory, original_header, items_map, publication, factor)


if __name__ == '__main__':
    main()