13 #include <initializer_list>
18 #include <unordered_map>
20 #include <unordered_set>
25 extern const std::vector<std::pair<std::string, std::vector<std::string>>>
L33T_TABLE = {
28 {
"c", {
"(",
"{",
"[",
"<"}},
31 {
"i", {
"1",
"!",
"|"}},
32 {
"l", {
"1",
"|",
"7"}},
41 extern const std::vector<std::pair<RegexTag, std::regex>>
REGEXEN = {
47 constexpr std::initializer_list<std::pair<int, int>>
DATE_SPLITS[] = {
74 std::string translate(
const std::string &
string,
75 const std::unordered_map<std::string, std::string> & chr_map) {
77 auto bit = std::back_inserter(toret);
78 toret.reserve(
string.size());
79 for (
auto it =
string.begin(); it !=
string.end();) {
81 auto ch = std::string(it, nextit);
82 auto mit = chr_map.find(ch);
83 if (mit != chr_map.end()) {
86 std::copy(ch.begin(), ch.end(), bit);
93 std::vector<Match> & sorted(std::vector<Match> & matches) {
94 std::sort(matches.begin(), matches.end(),
95 [&] (
const Match & m1,
const Match & m2) ->
bool {
96 return std::make_pair(m1.i, m1.j) < std::make_pair(m2.i, m2.j);
102 std::string dict_normalize(
const std::string & str) {
109 std::vector<Match>
omnimatch(
const std::string & password,
110 const std::vector<std::string> & ordered_list) {
114 ranked_dictionaries.insert(std::make_pair(DictionaryTag::USER_INPUTS,
115 std::cref(ranked_dict)));
117 std::vector<Match> matches;
118 std::function<std::vector<Match>(
const std::string &)> matchers[] = {
120 std::cref(ranked_dictionaries)),
122 std::cref(ranked_dictionaries)),
124 std::cref(ranked_dictionaries), std::cref(
L33T_TABLE)),
132 for (
const auto & matcher : matchers) {
133 auto ret = matcher(password);
134 std::move(ret.begin(), ret.end(), std::back_inserter(matches));
136 return sorted(matches);
145 std::vector<Match> matches;
146 auto len = password.length();
147 auto password_lower = dict_normalize(password);
148 for (
const auto & item : ranked_dictionaries) {
149 auto dictionary_tag = item.first;
150 auto & ranked_dict = item.second;
151 for (decltype(len) i = 0, idx = 0; idx < len;
util::utf8_decode(password, idx), ++i) {
152 for (decltype(len) j = i, jdx = idx; jdx < len; ++j) {
156 auto word = password_lower.substr(idx, jdx - idx);
157 auto it = ranked_dict.find(word);
158 if (it != ranked_dict.end()) {
159 auto rank = it->second;
160 matches.push_back(
Match(i, j, password.substr(idx, jdx - idx),
166 matches.back().idx = idx;
167 matches.back().jdx = jdx;
172 return sorted(matches);
180 for (
auto & match : matches) {
182 match.get_dictionary().reversed =
true;
184 std::tie(match.i, match.j) = std::make_tuple(
188 std::tie(match.idx, match.jdx) = std::make_tuple(
189 password.length() - match.jdx,
190 password.length() - match.idx
193 return sorted(matches);
201 std::unordered_map<std::string, std::vector<std::string>>
relevant_l33t_subtable(
const std::string & password,
const std::vector<std::pair<std::string, std::vector<std::string>>> & table) {
202 std::unordered_map<std::string, std::vector<std::string>> subtable;
203 for (
const auto & item : table) {
204 auto & letter = item.first;
205 auto & subs = item.second;
206 std::vector<std::string> relevant_subs;
207 for (
const auto & sub : subs) {
208 if (password.find(sub) != password.npos) {
209 relevant_subs.push_back(sub);
213 if (relevant_subs.size()) {
214 subtable.insert(std::make_pair(letter, relevant_subs));
221 std::vector<std::unordered_map<std::string, std::string>>
enumerate_l33t_subs(
const std::unordered_map<std::string, std::vector<std::string>> & table) {
222 using SubsType = std::vector<std::vector<std::pair<std::string, std::string>>>;
223 SubsType subs = {{}};
225 auto dedup = [] (
const SubsType & subs) {
227 std::unordered_set<std::string> members;
228 for (
const auto & sub : subs) {
230 std::sort(assoc.begin(), assoc.end());
231 std::ostringstream label_stream;
232 for (
const auto & item : assoc) {
233 label_stream << item.first <<
"," << item.second <<
"-";
235 auto label = label_stream.str();
236 if (members.find(label) == members.end()) {
237 members.insert(std::move(label));
238 deduped.push_back(sub);
244 for (
const auto & item : table) {
245 auto & first_key = item.first;
247 for (
const auto & l33t_chr : item.second) {
248 for (
const auto & sub : subs) {
249 auto sub_alternative = sub;
250 auto it = std::find_if(
251 sub_alternative.begin(), sub_alternative.end(),
252 [&] (
const std::pair<std::string, std::string> & sub_elt) {
253 return sub_elt.first == l33t_chr;
255 if (it == sub_alternative.end()) {
256 sub_alternative.push_back(std::make_pair(l33t_chr, first_key));
257 next_subs.push_back(std::move(sub_alternative));
260 sub_alternative.erase(it);
261 sub_alternative.push_back(std::make_pair(l33t_chr, first_key));
262 next_subs.push_back(sub);
263 next_subs.push_back(std::move(sub_alternative));
267 subs = dedup(next_subs);
271 std::vector<std::unordered_map<std::string, std::string>> sub_dicts;
272 for (
const auto & sub : subs) {
273 sub_dicts.push_back(std::unordered_map<std::string, std::string>
274 (sub.begin(), sub.end()));
282 const std::vector<std::pair<std::string, std::vector<std::string>>> & l33t_table) {
283 std::vector<Match> matches;
285 if (!sub.size())
break;
286 auto subbed_password = translate(password, sub);
287 for (
auto & match :
dictionary_match(subbed_password, ranked_dictionaries)) {
288 auto & dmatch = match.get_dictionary();
289 auto token = password.substr(match.idx, match.jdx - match.idx);
290 if (dict_normalize(token) == dmatch.matched_word) {
295 std::unordered_map<std::string, std::string> match_sub;
296 for (
const auto & item : sub) {
297 auto & subbed_chr = item.first;
298 if (token.find(subbed_chr) == token.npos)
continue;
299 match_sub.insert(item);
303 dmatch.sub = match_sub;
304 std::ostringstream os;
305 std::string sep =
"";
306 for (
const auto & item : match_sub) {
307 os << sep << item.first <<
" -> " << item.second;
312 dmatch.sub_display = os.str();
313 matches.push_back(std::move(match));
317 matches.erase(std::remove_if(matches.begin(), matches.end(), [] (
const Match & a) {
321 return util::character_len(a.token) <= 1;
325 return sorted(matches);
333 std::vector<Match> spatial_match_helper(
const std::string & password,
339 std::vector<Match> matches;
340 for (
const auto & item :
graphs) {
341 auto ret = spatial_match_helper(password, item.second, item.first);
342 std::move(ret.begin(), ret.end(), std::back_inserter(matches));
347 const auto SHIFTED_RX = std::regex(
"[~!@#$%^&*()_+QWERTYUIOP{}|ASDFGHJKL:\"ZXCVBNM<>?]");
350 std::vector<Match> spatial_match_helper(
const std::string & password,
353 std::vector<Match> matches;
354 if (!password.length())
return matches;
358 while (i < clen - 1) {
362 auto last_direction = -1;
364 unsigned shifted_count;
365 if ((graph_tag == GraphTag::QWERTY ||
366 graph_tag == GraphTag::DVORAK) &&
367 std::regex_search(password.substr(idx, jdx - idx),
SHIFTED_RX)) {
375 auto prev_char = password.substr(prev_jdx, jdx - prev_jdx);
377 auto found_direction = -1;
378 auto cur_direction = -1;
379 const auto & adjacents = [&] {
380 auto it = graph.find(prev_char);
381 if (it != graph.end()) {
384 return std::vector<optional::optional<std::string>>();
390 auto cur_char = password.substr(jdx, next_jdx - jdx);
391 for (
auto & adj : adjacents) {
393 if (adj && adj->find(cur_char) != adj->npos) {
395 found_direction = cur_direction;
396 if (adj->find(cur_char) == 1) {
403 if (last_direction != found_direction) {
407 last_direction = found_direction;
422 matches.push_back(Match(i, j - 1, password.substr(idx, jdx - idx),
424 graph_tag, turns, shifted_count,
426 matches.back().idx = idx;
427 matches.back().jdx = jdx;
444 std::vector<Match> matches;
445 std::regex greedy(R
"((.+)\1+)");
446 std::regex lazy(R"((.+?)\1+)");
447 std::regex lazy_anchored(R"(^(.+?)\1+$)");
449 while (lastIndex < password.length()) {
450 auto start_iter = lastIndex + password.begin();
451 std::smatch greedy_match, lazy_match;
452 std::regex_search(start_iter, password.end(),
453 greedy_match, greedy);
454 std::regex_search(start_iter, password.end(),
456 if (!greedy_match.size())
break;
458 std::string base_token;
459 if (greedy_match[0].length() > lazy_match[0].length()) {
463 match = greedy_match;
468 std::smatch lazy_anchored_match;
469 auto greedy_found = match.str(0);
470 auto ret = std::regex_search(greedy_found, lazy_anchored_match, lazy_anchored);
473 base_token = lazy_anchored_match.str(1);
479 match = std::move(lazy_match);
480 base_token = match.str(1);
482 auto idx = lastIndex + match.position();
483 auto jdx = lastIndex + match.position() + match[0].length();
487 auto sub_matches =
omnimatch(base_token);
493 std::vector<Match> base_matches;
494 std::move(base_analysis.sequence.begin(), base_analysis.sequence.end(),
495 std::back_inserter(base_matches));
496 auto & base_guesses = base_analysis.guesses;
497 matches.push_back(
Match(i, j, match.str(0),
501 std::move(base_matches),
502 match[0].length() / base_token.length(),
504 matches.back().idx = idx;
505 matches.back().jdx = jdx;
528 std::vector<Match> result;
530 using delta_t = std::int32_t;
533 if (j - i > 1 || std::abs(delta) == 1) {
534 if (0 < std::abs(delta) && std::abs(delta) <=
MAX_DELTA) {
535 auto token = password.substr(idx, jdx - idx);
537 unsigned sequence_space;
538 if (std::regex_search(token, std::regex(R
"(^[a-z]+$)"))) {
539 sequence_name = SequenceTag::LOWER;
542 else if (std::regex_search(token, std::regex(R
"(^[A-Z]+$)"))) {
543 sequence_name = SequenceTag::UPPER;
546 else if (std::regex_search(token, std::regex(R
"(^\d+$)"))) {
547 sequence_name = SequenceTag::DIGITS;
551 sequence_name = SequenceTag::UNICODE;
554 result.push_back(
Match(i, j, token,
557 result.back().idx = idx;
558 result.back().jdx = jdx;
563 if (!password.size())
return result;
565 decltype(password.length()) i = 0;
566 decltype(password.length()) idx = 0;
568 decltype(password.length()) kdx = 0;
571 for (
idx_t k = 1; kdx < password.length(); ++k) {
574 assert(kdx != next_kdx);
575 delta_t delta = cp - last_cp;
576 if (!maybe_last_delta) {
577 maybe_last_delta = delta;
579 if (delta != *maybe_last_delta) {
582 update(i, j, idx, jdx, *maybe_last_delta);
585 maybe_last_delta = delta;
591 if (maybe_last_delta) {
593 idx, password.size(), *maybe_last_delta);
604 const std::vector<std::pair<RegexTag, std::regex>> & regexen) {
605 std::vector<Match> matches;
606 for (
const auto & item : regexen) {
607 auto tag = item.first;
608 auto & regex = item.second;
609 std::smatch rx_match;
610 std::size_t lastIndex = 0;
613 auto token = rx_match.str(0);
614 auto idx = lastIndex + rx_match.position();
615 auto jdx = lastIndex + rx_match.position() + rx_match[0].length();
616 assert(token == password.substr(idx, jdx - idx));
619 matches.push_back(
Match(i, j,
622 matches.back().idx = idx;
623 matches.back().jdx = jdx;
624 lastIndex += rx_match[0].length();
627 return sorted(matches);
644 date_t stou(
const std::string & a) {
645 return static_cast<date_t>(std::stoul(a));
648 std::vector<Match>
date_match(
const std::string & password) {
667 std::vector<Match> matches;
668 std::regex maybe_date_no_separator(R
"(^\d{4,8}$)");
669 std::regex maybe_date_with_separator(R"(^(\d{1,4})([\s/\\_.-])(\d{1,2})\2(\d{1,4})$)");
672 std::vector<std::string::size_type> offsets;
675 for (
auto i = 0; i < 9; ++i) {
676 offsets.push_back(idx_dot);
677 if (idx_dot >= password.length()) {
682 assert(offsets.size());
683 for (decltype(password.length()) i = 0; offsets.size() - 1 >= 4; ++i) {
684 auto idx = offsets[0];
685 for (decltype(i) offset = 3; offset <= 7 && offset < offsets.size() - 1; ++offset) {
687 auto jdx = offsets[offset + 1];
689 auto token = password.substr(idx, jdx - idx);
690 auto token_chr_len = j - i + 1;
692 if (!std::regex_search(token, maybe_date_no_separator))
continue;
693 std::vector<DMY> candidates;
694 for (
const auto & item :
DATE_SPLITS[token_chr_len - 4]) {
696 auto l = item.second;
697 auto kdx = offsets[k] - idx;
698 auto ldx = offsets[l] - idx;
700 auto dmy = map_ints_to_dmy(std::array<date_t, 3>{{
701 stou(token.substr(0, kdx)),
702 stou(token.substr(kdx, ldx - kdx)),
703 stou(token.substr(ldx))}});
704 if (dmy) candidates.push_back(*dmy);
706 if (!candidates.size())
continue;
713 auto metric = [] (
const DMY & candidate) {
721 auto best_candidate = *std::min_element(candidates.begin(), candidates.end(),
722 [=] (
const DMY & a,
const DMY &
b) {
723 return metric(a) < metric(b);
725 matches.push_back(
Match(i, j, token,
728 best_candidate.month,
732 matches.back().idx = idx;
733 matches.back().jdx = jdx;
736 offsets.erase(offsets.begin());
737 if (offsets.back() < password.length()) {
738 auto idx2 = offsets.back();
740 offsets.push_back(idx2);
748 for (
auto i = 0; i < 11; ++i) {
749 offsets.push_back(idx_dot);
750 if (idx_dot >= password.length()) {
755 assert(offsets.size());
756 for (decltype(password.length()) i = 0; offsets.size() - 1 >= 6; ++i) {
757 auto idx = offsets[0];
758 for (decltype(password.length()) offset = 5; offset <= 9 && offset < offsets.size() - 1; ++offset) {
760 auto jdx = offsets[offset + 1];
761 auto token = password.substr(idx, jdx - idx);
762 std::smatch rx_match;
766 auto dmy = map_ints_to_dmy(std::array<date_t, 3>{{
769 stou(rx_match[4])}});
771 matches.push_back(
Match(i, j, token,
778 matches.back().idx = idx;
779 matches.back().jdx = jdx;
782 offsets.erase(offsets.begin());
783 if (offsets.back() < password.length()) {
784 auto idx2 = offsets.back();
786 offsets.push_back(idx2);
797 matches.erase(std::remove_if(matches.begin(), matches.end(), [&] (
const Match & match) {
798 for (auto & other_match : matches) {
799 if (other_match.i == match.i && other_match.j == match.j) continue;
800 if (other_match.i <= match.i && other_match.j >= match.j) {
808 return sorted(matches);
812 optional::optional<DMY> map_ints_to_dm(
const std::array<date_t, 2> & vals);
818 optional::optional<DMY> map_ints_to_dmy(
const std::array<date_t, 3> & vals) {
827 if (vals[1] > 31 || vals[1] == 0)
return optional::nullopt;
831 for (
auto val : vals) {
833 if (val > 31) over_31 += 1;
834 if (val > 12) over_12 += 1;
835 if (val <= 0) under_1 += 1;
837 if (over_31 >= 2 || over_12 == 3 || under_1 >= 2)
return optional::nullopt;
840 std::pair<date_t, std::array<date_t, 2>> possible_year_splits[] = {
841 {vals[2], {{vals[0], vals[1]}}},
842 {vals[0], {{vals[1], vals[2]}}},
844 for (
const auto & item : possible_year_splits) {
845 auto & y = item.first;
846 auto & rest = item.second;
848 auto dm = map_ints_to_dm(rest);
850 return DMY{y, dm->month, dm->day};
856 return optional::nullopt;
863 for (
const auto & item : possible_year_splits) {
865 auto & rest = item.second;
866 auto dm = map_ints_to_dm(rest);
868 y = two_to_four_digit_year(y);
869 return DMY{y, dm->month, dm->day};
873 return optional::nullopt;
877 optional::optional<DMY> map_ints_to_dm(
const std::array<date_t, 2> & vals) {
878 for (
const auto & item : {vals, {{vals[1], vals[0]}}}) {
879 auto d = item[0], m = item[1];
880 if (1 <= d && d <= 31 && 1 <= m && m <= 12) {
884 return optional::nullopt;
892 else if (year > 50) {