#include "winnowing.h"

std::vector <Code> getCodes(std::vector <std::string> &codeFiles, int k, int w)
{
    std::vector <Code> codes;

    for (const auto &codeFile: codeFiles)
    {
        initializeSets();
        Code *code = new Code;
        code->filePath = codeFile;
        qDebug() << codeFile.c_str();
        getCodeSkeleton(*code);
        qDebug() << "getCodeSkeleton";

        if (code->skeleton.size() >= k)
        {
            getFingerprints(*code, k, w);
            qDebug() << "getFingerprints";
            setFileName(*code);
            codes.push_back(*code);
        }
    }

    return codes;
}

std::vector<Code> getDocs(std::vector<std::string> &docFiles, int k, int w)
{
    std::vector <Code> codes;

    for (const auto &codeFile: docFiles)
    {
        Code *code = new Code;
        code->filePath = codeFile;
        qDebug() << code->filePath.c_str();
        code->pureCode = code->skeleton = readFile(code->filePath);
        qDebug() << "readFile";

        if (code->skeleton.size() >= k)
        {
            getFingerprints(*code, k, w);
            qDebug() << "getFingerprints";
            setFileName(*code);
            codes.push_back(*code);
        }
    }

    return codes;
}

std::string readFile(const std::string &path)
{
    std::string fullCode, word;
    std::ifstream inFile(path.substr(1, path.size()-2));

    if (!inFile)
    {
        qDebug() << "Unable to open file";
        exit(1);
    }

    while (inFile >> word)
        fullCode += word;

    inFile.close();

    return fullCode;
}

void getCodeSkeleton(Code &code)
{
    std::string fullCode = removeComments(code.filePath);
    fullCode = removeSpaces(fullCode);
    removeQuotes(fullCode);
    code.pureCode = fullCode;
    code.skeleton = transformCode(fullCode);
}

void getFingerprints(Code &code, int k, int w)
{
    // Applying Karp-Rabin algorithm:
    long long factor = getFactor(k);
    std::vector <long long> hashKeys = karpRabinHashing(code.skeleton, k, factor);

    // Applying Winnowing algorithm:
    int min = -1; // index of minimum hash

    for (int i = 0; i < hashKeys.size()-w+1; ++i)
    {
        if (min < i) // the previous minimum hash key is no longer in the window
        {
            min = std::min_element(hashKeys.begin()+i, hashKeys.begin()+i+w) - hashKeys.begin();
            ++code.fingerprints[hashKeys[min]];
            ++code.numOfSelectedFingerPrints;
        }
        else if (hashKeys[i+w-1] <= hashKeys[min]) // the previous minimum hash key is still in the window
        {
            min = i+w-1;
            ++code.fingerprints[hashKeys[min]];
            ++code.numOfSelectedFingerPrints;
        }
    }
}

int compareCodes(Code &code1, Code &code2)
{
    int tokens_matched = 0;

    for (const auto &p1: code1.fingerprints)
    {
        auto p2 = code2.fingerprints.find(p1.first);

        if (p2 != code2.fingerprints.end())
            tokens_matched += std::min(p1.second, p2->second);
    }

    return tokens_matched;
}

void setFileName(Code &code)
{
    auto index = code.filePath.find_last_of('/');

    if (index != std::string::npos)
        code.fileName = code.filePath.substr(index+1, code.filePath.size()-index-2);
}