#include <unistd.h>
#include <sys/types.h>
#include <sys/wait.h>
#include <sys/stat.h>
#include <fcntl.h>
#include <errno.h>
#include <stdlib.h>
#include <string.h>
#include <stdio.h>
#include <locale.h>

#define READ_BUF_SIZE 4096
#define LINE_BUF_SIZE 4096

static void write_all(int fd, const char *buf, size_t len)
{
    while (len > 0) {
        ssize_t n = write(fd, buf, len);
        if (n < 0) {
            if (errno == EINTR) {
                continue;
            }
            _exit(1);
        }
        buf += n;
        len -= (size_t)n;
    }
}

static void write_str(int fd, const char *s)
{
    write_all(fd, s, strlen(s));
}

static void log_errno_msg(const char *prefix)
{
    char buf[512];
    int n = snprintf(buf, sizeof(buf), "%s: %s\n", prefix, strerror(errno));
    if (n > 0) {
        write_all(STDERR_FILENO, buf, (size_t)n);
    }
}

static void log_usage(const char *progname)
{
    char buf[512];
    int n = snprintf(buf, sizeof(buf),
                     "Uso: %s <multi_map:0|1> <tmp_dir> <input1> [input2 ...]\n",
                     progname);
    if (n > 0) {
        write_all(STDERR_FILENO, buf, (size_t)n);
    }
}

static int wait_child(pid_t pid)
{
    int status;

    while (waitpid(pid, &status, 0) < 0) {
        if (errno == EINTR) {
            continue;
        }
        return -1;
    }

    if (WIFEXITED(status) && WEXITSTATUS(status) == 0) {
        return 0;
    }

    errno = ECHILD;
    return -1;
}

static int ensure_dir_exists(const char *path)
{
    struct stat st;

    if (stat(path, &st) == 0) {
        if (S_ISDIR(st.st_mode)) {
            return 0;
        }
        errno = ENOTDIR;
        return -1;
    }

    if (mkdir(path, 0755) < 0) {
        return -1;
    }

    return 0;
}

static void build_path(char *out, size_t out_size,
                       const char *dir, const char *prefix, int index)
{
    snprintf(out, out_size, "%s/%s%d.tmp", dir, prefix, index);
}

static int extract_normalized_words(const char *input_file, const char *normalized_file)
{
    int pipefd[2];
    pid_t grep_pid, tr_pid;
    int outfd;

    if (pipe(pipefd) < 0) {
        return -1;
    }

    grep_pid = fork();
    if (grep_pid < 0) {
        close(pipefd[0]);
        close(pipefd[1]);
        return -1;
    }

    if (grep_pid == 0) {
        if (dup2(pipefd[1], STDOUT_FILENO) < 0) {
            _exit(127);
        }
        close(pipefd[0]);
        close(pipefd[1]);

        execlp("grep", "grep", "-oE", "[[:alpha:]]+", input_file, (char *)NULL);
        _exit(127);
    }

    tr_pid = fork();
    if (tr_pid < 0) {
        close(pipefd[0]);
        close(pipefd[1]);
        return -1;
    }

    if (tr_pid == 0) {
        outfd = open(normalized_file, O_WRONLY | O_CREAT | O_TRUNC, 0644);
        if (outfd < 0) {
            _exit(127);
        }

        if (dup2(pipefd[0], STDIN_FILENO) < 0) {
            _exit(127);
        }

        if (dup2(outfd, STDOUT_FILENO) < 0) {
            _exit(127);
        }

        close(outfd);
        close(pipefd[0]);
        close(pipefd[1]);

        execlp("tr", "tr", "[:upper:]", "[:lower:]", (char *)NULL);
        _exit(127);
    }

    close(pipefd[0]);
    close(pipefd[1]);

    if (wait_child(grep_pid) < 0) {
        return -1;
    }

    if (wait_child(tr_pid) < 0) {
        return -1;
    }

    return 0;
}

static int emit_map_file(const char *normalized_file,
                         const char *map_file,
                         int append_mode,
                         long *word_count)
{
    int infd, outfd;
    char rbuf[READ_BUF_SIZE];
    char line[LINE_BUF_SIZE];
    ssize_t nread;
    size_t line_len = 0;

    infd = open(normalized_file, O_RDONLY);
    if (infd < 0) {
        return -1;
    }

    if (append_mode) {
        outfd = open(map_file, O_WRONLY | O_CREAT | O_APPEND, 0644);
    } else {
        outfd = open(map_file, O_WRONLY | O_CREAT | O_TRUNC, 0644);
    }

    if (outfd < 0) {
        close(infd);
        return -1;
    }

    while ((nread = read(infd, rbuf, sizeof(rbuf))) > 0) {
        ssize_t i;

        for (i = 0; i < nread; ++i) {
            char c = rbuf[i];

            if (c == '\n') {
                if (line_len > 0) {
                    write_all(outfd, line, line_len);
                    write_str(outfd, " 1\n");
                    (*word_count)++;
                    line_len = 0;
                }
            } else {
                if (line_len + 1 >= sizeof(line)) {
                    close(infd);
                    close(outfd);
                    errno = ENOMEM;
                    return -1;
                }
                line[line_len++] = c;
            }
        }
    }

    if (nread < 0) {
        close(infd);
        close(outfd);
        return -1;
    }

    if (line_len > 0) {
        write_all(outfd, line, line_len);
        write_str(outfd, " 1\n");
        (*word_count)++;
    }

    if (close(infd) < 0) {
        close(outfd);
        return -1;
    }

    if (close(outfd) < 0) {
        return -1;
    }

    return 0;
}

int main(int argc, char *argv[])
{
    int multi_map;
    const char *tmp_dir;
    int n_inputs;
    int i;
    long total_words = 0;

    char normalized_file[512];
    char map_file[512];

    setlocale(LC_ALL, "");

    if (argc < 4) {
        log_usage(argv[0]);
        return 1;
    }

    multi_map = atoi(argv[1]);
    tmp_dir = argv[2];
    n_inputs = argc - 3;

    if (multi_map != 0 && multi_map != 1) {
        write_str(STDERR_FILENO, "Error: multi_map debe ser 0 o 1.\n");
        return 1;
    }

    if (ensure_dir_exists(tmp_dir) < 0) {
        log_errno_msg("mkdir/stat tmp_dir");
        return 1;
    }

    if (multi_map == 0) {
        build_path(map_file, sizeof(map_file), tmp_dir, "_map_", 0);
    }

    for (i = 0; i < n_inputs; ++i) {
        const char *input_file = argv[i + 3];

        build_path(normalized_file, sizeof(normalized_file), tmp_dir, "_norm_", i);

        if (extract_normalized_words(input_file, normalized_file) < 0) {
            log_errno_msg("extract_normalized_words");
            return 1;
        }

        if (multi_map == 1) {
            build_path(map_file, sizeof(map_file), tmp_dir, "_map_", i);
        }

        if (emit_map_file(normalized_file,
                          map_file,
                          (multi_map == 0 && i > 0) ? 1 : 0,
                          &total_words) < 0) {
            log_errno_msg("emit_map_file");
            unlink(normalized_file);
            return 1;
        }

        unlink(normalized_file);
    }

    {
        char buf[256];
        int n = snprintf(buf, sizeof(buf),
                         "[WordCount:map] Processed %ld words.\n",
                         total_words);
        if (n > 0) {
            write_all(STDOUT_FILENO, buf, (size_t)n);
        }
    }

    return (int)(total_words % 256);
}
