%------------------------------------------------------------------------------%
% vim: ts=4 sw=4 et ft=mercury
%------------------------------------------------------------------------------%
% Copyright (C) 2020 The Mercury team.
% This file may only be copied under the terms of the GNU General
% Public License - see the file COPYING in the Mercury distribution.
%------------------------------------------------------------------------------%
%
% Author: Zoltan Somogyi
%
% split_file: a tool that helps in splitting up a file into two or more pieces.
%
% This program takes one input file and the names of two or more new files
% to be created. It considers the input file to consist of a sequence of lines,
% some of which are control lines, and rest are data lines. What distinguishes
% between them is that control lines start with the keyword SPLIT and a space
% in the first six columns. After the keyword, there should be a
% space-separated list of one or more integers in the range 1 to N,
% where N is the number of output files. This tells split_file that
% the data lines between this control line and the next control line
% should be put into the output file(s) whose number(s) appear on the
% control line. The data lines before the first control line go into
% the first output file.
%
% For example,
%
%   start
%   SPLIT 1
%   123
%   abc
%   SPLIT 1 2
%   def
%   SPLIT 2
%   ghi
%
% will cause split_file to put
%
%   start
%   123
%   abc
%   def
%
% into the first output file and
%
%   def
%   ghi
%
% into the second output file.

:- module split_file.

:- interface.

:- import_module io.

:- pred main(io::di, io::uo) is det.

%------------------------------------------------------------------------------%

:- implementation.

:- import_module assoc_list.
:- import_module char.
:- import_module cord.
:- import_module int.
:- import_module list.
:- import_module map.
:- import_module pair.
:- import_module string.

%------------------------------------------------------------------------------%

:- type file_name == string.
:- type output_file
    --->    output_file(
                % The name of the output file.
                file_name,

                % The lines being queued up to be printed out to that file.
                cord(string)
            ).
:- type output_map == map(int, output_file).

main(!IO) :-
    io.command_line_arguments(Args, !IO),
    io.stderr_stream(StdErrStream, !IO),
    ( if
        Args = [InputFileName | OutputFileNames],
        list.length(OutputFileNames) > 1
    then
        io.open_input(InputFileName, OpenInputResult, !IO),
        (
            OpenInputResult = ok(InputStream),
            io.read_file_as_string(InputStream, ReadInputResult, !IO),
            (
                ReadInputResult = ok(InputFileString),
                make_output_map(OutputFileNames, 1, map.init, OutputMap0),
                InputLines = string.split_at_char('\n', InputFileString),
                split_file(InputLines, InputFileName, 1, [1],
                    OutputMap0, OutputMap, cord.init, ErrorsCord),
                Errors = cord.list(ErrorsCord),
                (
                    Errors = [],
                    map.to_assoc_list(OutputMap, OutputAssocList),
                    output_split_files(StdErrStream, OutputAssocList, !IO)
                ;
                    Errors = [_ | _],
                    list.foldl(io.write_string(StdErrStream), Errors, !IO)
                )
            ;
                ReadInputResult = error(_, ReadInputError),
                io.error_message(ReadInputError, ReadInputErrorMsg),
                io.format(StdErrStream, "error reading %s: %s\n",
                    [s(InputFileName), s(ReadInputErrorMsg)], !IO),
                io.set_exit_status(1, !IO)
            )
        ;
            OpenInputResult = error(OpenInputError),
            io.error_message(OpenInputError, OpenInputErrorMsg),
            io.format(StdErrStream, "error opening %s for reading: %s\n",
                [s(InputFileName), s(OpenInputErrorMsg)], !IO),
            io.set_exit_status(1, !IO)
        )
    else
        io.write_string(StdErrStream, "usage: split_file input_file " ++
            "output_file1 output_file2 [...]\n", !IO),
        io.set_exit_status(1, !IO)
    ).

%------------------------------------------------------------------------------%

:- pred make_output_map(list(file_name)::in, int::in,
    output_map::in, output_map::out) is det.

make_output_map([], _, !OutputMap).
make_output_map([OutputFileName | OutputFileNames], FileNumber, !OutputMap) :-
    OutputFile = output_file(OutputFileName, cord.init),
    map.det_insert(FileNumber, OutputFile, !OutputMap),
    make_output_map(OutputFileNames, FileNumber + 1, !OutputMap).

%------------------------------------------------------------------------------%

:- pred split_file(list(string)::in, file_name::in, int::in,
    list(int)::in, output_map::in, output_map::out,
    cord(string)::in, cord(string)::out) is det.

split_file([], _, _, _, !OutputMap, !Errors).
split_file([Line | Lines], FileName, LineNumber, !.CurOutputs,
        !OutputMap, !Errors) :-
    ( if Line = "", Lines = [] then
        % When we split up the contents of the input file with split_at_char,
        % the char being a newline, split_at_char will return a string
        % *after* the last newline, even if it is empty. We could delete it
        % immediately after the call to split_at_char, but that would
        % add an extra traversal of the line list. Instead, we ignore it here.
        true
    else if string.remove_prefix("SPLIT ", Line, RestOfCtrlLine) then
        RestOfCtrlLineWords =
            string.split_at_separator(char.is_whitespace, RestOfCtrlLine),
        parse_control_line_numbers(RestOfCtrlLineWords,
            FileName, LineNumber, !.OutputMap, [], !:CurOutputs, !Errors)
    else
        record_data_line(Line, !.CurOutputs, !OutputMap)
    ),
    split_file(Lines, FileName, LineNumber + 1, !.CurOutputs,
        !OutputMap, !Errors).

:- pred parse_control_line_numbers(list(string)::in,
    file_name::in, int::in, output_map::in,
    list(int)::in, list(int)::out, cord(string)::in, cord(string)::out) is det.

parse_control_line_numbers([], _, _, _, !Outputs, !Errors).
parse_control_line_numbers([Word | Words], FileName, LineNumber,
        OutputMap, !Outputs, !Errors) :-
    ( if string.to_int(Word, Num) then
        ( if map.search(OutputMap, Num, _) then
            % The order of the numbers does not matter.
            !:Outputs = [Num | !.Outputs]
        else
            string.format("%s:%d: %s is not a valid output file number\n",
                [s(FileName), i(LineNumber), s(Word)], Error),
            cord.snoc(Error, !Errors)
        )
    else
        string.format("%s:%d: %s is not a number\n",
            [s(FileName), i(LineNumber), s(Word)], Error),
        cord.snoc(Error, !Errors)
    ),
    parse_control_line_numbers(Words, FileName, LineNumber,
        OutputMap, !Outputs, !Errors).

:- pred record_data_line(string::in, list(int)::in,
    output_map::in, output_map::out) is det.

record_data_line(_, [], !OutputMap).
record_data_line(DataLine, [Output | Outputs], !OutputMap) :-
    map.lookup(!.OutputMap, Output, OutputFile0),
    OutputFile0 = output_file(OutputFileName, DataLinesCord0),
    cord.snoc(DataLine, DataLinesCord0, DataLinesCord),
    OutputFile = output_file(OutputFileName, DataLinesCord),
    map.det_update(Output, OutputFile, !OutputMap),
    record_data_line(DataLine, Outputs, !OutputMap).

%------------------------------------------------------------------------------%

:- pred output_split_files(io.text_output_stream::in,
    assoc_list(int, output_file)::in, io::di, io::uo) is det.

output_split_files(_, [], !IO).
output_split_files(StdErrStream, [_Output - OutputFile | OutputFiles], !IO) :-
    OutputFile = output_file(OutputFileName, DataLinesCord),
    io.open_output(OutputFileName, OpenOutputResult, !IO),
    (
        OpenOutputResult = ok(OutputStream),
        DataLines = cord.list(DataLinesCord),
        list.foldl(write_line_and_nl(OutputStream), DataLines, !IO),
        io.close_output(OutputStream, !IO)
    ;
        OpenOutputResult = error(OpenOutputError),
        io.error_message(OpenOutputError, OpenOutputErrorMsg),
        io.format(StdErrStream, "error opening %s for writing: %s\n",
            [s(OutputFileName), s(OpenOutputErrorMsg)], !IO),
        io.set_exit_status(1, !IO)
    ),
    output_split_files(StdErrStream, OutputFiles, !IO).

:- pred write_line_and_nl(io.text_output_stream::in, string::in,
    io::di, io::uo) is det.

write_line_and_nl(OutputStream, Line, !IO) :-
    io.format(OutputStream, "%s\n", [s(Line)], !IO).

%------------------------------------------------------------------------------%