chatgpt3.5,第一次给出的代码,勉强能用(就是有乱码),后来越改越不行。请大佬指点,Ubuntu c++
#include <fstream>
#include <unordered_map>
// 将 _idx2word 保存为 UTF-8 格式的文件
void saveIdx2Word(const std::unordered_map<int, std::string>& idx2word) {
std::ofstream file("idx2word.txt", std::ios::out | std::ios::binary);
if (file.is_open()) {
for (const auto& pair : idx2word) {
file << pair.first << "\t" << pair.second << "\n";
}
file.close();
}
}
// 将 _word2idx 保存为 UTF-8 格式的文件
void saveWord2Idx(const std::unordered_map<std::string, int>& word2idx) {
std::ofstream file("word2idx.txt", std::ios::out | std::ios::binary);
if (file.is_open()) {
for (const auto& pair : word2idx) {
file << pair.first << "\t" << pair.second << "\n";
}
file.close();
}
}
NormalTokenizer::NormalTokenizer(msgpack::object obj) : TokenizerBase(0, 0, 0) {
try {
auto dict = obj.as<std::unordered_map<std::string, msgpack::object>>();// 估计是报异常了
if (dict.find("type") != dict.end()) {
RV_CHECK(dict["type"].as<std::string>() == "NormalTokenizer");
}
_idx2word = dict["idx2word"].as<std::unordered_map<int, std::string>>();
if (dict.find("normalizer") != dict.end()) {
_normalizer = dict["normalizer"].as<std::string>();
}
if (dict.find("pre_tokenizer") != dict.end()) {
_pre_tokenizer = dict["pre_tokenizer"].as<std::string>();
}
} catch (const std::exception &e) {
// legacy world tokenizer format
_idx2word = obj.as<std::unordered_map<int, std::string>>();
}
for (auto &pair : _idx2word) {
_word2idx[pair.second] = pair.first;
}
// 在适当的位置调用保存函数
saveIdx2Word(_idx2word);
saveWord2Idx(_word2idx);
_tree = std::make_unique<TrieTree>(_word2idx);//字典树(Trie Tree),可以参照
}
chatgpt3.5越改越不对,大佬有好法子吗?谢谢
--
FROM 120.242.238.*