| | #include <cstdlib> |
| | #include <ctime> |
| | #include <fstream> |
| | #include <iostream> |
| | #include <sstream> |
| | #include <unordered_map> |
| | #include <vector> |
| | #include <algorithm> |
| |
|
| | using namespace std; |
| |
|
| | unordered_map<string, uint32_t> vocab; |
| | unordered_map<uint64_t, vector<uint32_t>> hasilToOutput; |
| |
|
| | void preprocess(vector<string> &theString) { |
| | vector<string> tmp; |
| | for(auto s : theString) { |
| | string tmpp; |
| | for(auto c : s ) { |
| | if(!isalnum(c)) { |
| | if(tmpp.length() == 0) { |
| | tmp.push_back(string(1, c)); |
| | } else { |
| | tmp.push_back(tmpp); |
| | tmpp = ""; |
| | tmp.push_back(string(1, c)); |
| | } |
| | } else { |
| | tmpp += tolower(c); |
| | } |
| | } |
| | if(tmpp != "") tmp.push_back(tmpp); |
| | } |
| | theString = tmp; |
| | } |
| |
|
| | |
| | void loadModel(const string &filename) { |
| | ifstream file(filename); |
| | if (!file) { |
| | cerr << "Gagal membuka file model.\n"; |
| | exit(1); |
| | } |
| |
|
| | string line; |
| | bool readingVocab = false; |
| | bool readingMatch = false; |
| |
|
| | while (getline(file, line)) { |
| | if (line == "Vocabs:") { |
| | readingVocab = true; |
| | readingMatch = false; |
| | continue; |
| | } |
| | if (line == "Matchs:") { |
| | readingVocab = false; |
| | readingMatch = true; |
| | continue; |
| | } |
| |
|
| | if (readingVocab) { |
| | size_t pos = line.find(": "); |
| | if (pos != string::npos) { |
| | string word = line.substr(0, pos); |
| | uint32_t id = stoi(line.substr(pos + 2)); |
| | vocab[word] = id; |
| | } |
| | } else if (readingMatch) { |
| | if (line.back() == ':') { |
| | uint64_t key = stoull(line.substr(0, line.size() - 1)); |
| | getline(file, line); |
| | vector<uint32_t> targets; |
| | while (getline(file, line) && line != "]") { |
| | if (!line.empty()) { |
| | targets.push_back(stoi(line)); |
| | } |
| | } |
| | hasilToOutput[key] = targets; |
| | } |
| | } |
| | } |
| |
|
| | file.close(); |
| | } |
| |
|
| | |
| | string inferNextWord(const vector<string> &contextWords) { |
| | uint64_t total = 0; |
| | for (size_t i = 0; i < contextWords.size(); ++i) { |
| | const string &word = contextWords[i]; |
| | if (vocab.count(word)) { |
| | total += vocab[word] * (i + 1); |
| | } else { |
| | return "<unknown word: " + word + ">"; |
| | } |
| | } |
| |
|
| | if (hasilToOutput.count(total) == 0) { |
| | |
| | uint64_t closestKey = 0; |
| | uint64_t minDiff = UINT64_MAX; |
| |
|
| | for (const auto &[key, _] : hasilToOutput) { |
| | uint64_t diff = (key > total) ? key - total : total - key; |
| | if (diff < minDiff) { |
| | minDiff = diff; |
| | closestKey = key; |
| | } |
| | } |
| |
|
| | if (minDiff == UINT64_MAX) |
| | return "<no prediction>"; |
| | total = closestKey; |
| | } |
| | const auto &candidates = hasilToOutput[total]; |
| | |
| | unordered_map<uint32_t, int> freq; |
| | for (auto id : candidates) { |
| | freq[id]++; |
| | } |
| |
|
| | uint32_t predictedID = max_element(freq.begin(), freq.end(), |
| | [](const pair<uint32_t, int> &a, |
| | const pair<uint32_t, int> &b) { |
| | return a.second < b.second; |
| | }) |
| | ->first; |
| |
|
| | |
| | for (const auto &[word, id] : vocab) { |
| | if (id == predictedID) |
| | return word; |
| | } |
| |
|
| | return "<not found>"; |
| | } |
| |
|
| | |
| | int main() { |
| | srand(time(0)); |
| | loadModel("model.txt"); |
| |
|
| | cout << "Masukkan kalimat sebagai konteks:\n"; |
| | vector<string> context; |
| | string word; |
| | string words; |
| | getline(cin, words); |
| | stringstream ss(words); |
| | while (ss >> word) { |
| | context.push_back(word); |
| | } |
| | preprocess(context); |
| | auto newContext = context; |
| |
|
| | string prediction; |
| |
|
| | int i = 0; |
| |
|
| | while (prediction != "[AKHIR]" && i < 50) { |
| | prediction = inferNextWord(newContext); |
| | newContext.push_back(prediction); |
| | i++; |
| | } |
| | cout << "Prediksi kata berikutnya:"; |
| | for (auto m : newContext) { |
| | cout << " " << m; |
| | if (m.find("<unknown word:") != string::npos) |
| | break; |
| | } |
| | cout << endl; |
| |
|
| | return 0; |
| | } |
| |
|