Hack a LC hard problem step by step
2306. Naming a Company
2306. Naming a Company
This problem looks simple, however, it is pretty tricky as the naive one is too complicated.
To begin with, we can naively simulating the whole process. A trick used here is using a 26 bit mask to see which character can be converted.
Code version 1 (TLE)
class Solution {
public:
long long distinctNames(vector<string>& ideas) {
const int n = ideas.size();
long long ret = 0;
if (n <= 1) return ret;
unordered_set<string> ideas_set(ideas.begin(), ideas.end());
vector<vector<pair<string, int>>> first_letter_mask(26);
vector<string> words;
for (auto &idea_: ideas){
string idea = idea_;
int idx = idea[0] - 'a';
int letter_mask = 0;
for (int i = 0; i < 26; ++i){
if (i == idx)continue;
idea[0] = 'a' + i;
if (ideas_set.find(idea) == ideas_set.end()){
letter_mask |= (1<<i);
}
}
first_letter_mask[idx].emplace_back(idea_, letter_mask);
}
for (int i = 0; i < 26; ++i){
for (auto [word_i_, mask_i]: first_letter_mask[i]){
for (int j = 0; j< 26; ++j){
if (i == j) continue;
for (auto [word_j_, mask_j]: first_letter_mask[j]){
if ((mask_i & (1<<j)) && (mask_j & (1<<i))){
string word_i = word_i_;
string word_j = word_j_;
word_i[0] = 'a' + j;
word_j[0] = 'a' + i;
words.push_back(word_i + ' ' + word_j);
}
}
}
}
}
sort(words.begin(), words.end());
long long uniqueCount = unique(words.begin(), words.end()) - words.begin();
return uniqueCount;
}
};
Unfortunately, got TLE. Still it can have 73 out of 89 test cases passed. Not too bad.
Code version 2 (still got TLE)
Optimize the memory by using vector<vector<pair<int, int>>> instead of vector<vector<pair<string, int>>> as we only need save the ideas index.
class Solution {
public:
long long distinctNames(vector<string>& ideas) {
const int n = ideas.size();
long long ret = 0;
if (n <= 1) return ret;
unordered_set<string> ideas_set(ideas.begin(), ideas.end());
vector<vector<pair<int, int>>> first_letter_mask(26);
vector<string> words;
for (int i = 0; i < n; ++i){
string idea = ideas[i];
int idx = idea[0] - 'a';
int letter_mask = 0;
for (int j= 0; j< 26; ++j){
if (j == idx)continue;
idea[0] = 'a' + j;
if (ideas_set.find(idea) == ideas_set.end()){
letter_mask |= (1<<j);
}
}
first_letter_mask[idx].emplace_back(i, letter_mask);
}
for (int i = 0; i < 26; ++i){
for (auto [word_i_idx, mask_i]: first_letter_mask[i]){
for (int j = 0; j< 26; ++j){
if (i == j) continue;
for (auto [word_j_idx, mask_j]: first_letter_mask[j]){
if ((mask_i & (1<<j)) && (mask_j & (1<<i))){
string word_i = ideas[word_i_idx];
string word_j = ideas[word_j_idx];
word_i[0] = 'a' + j;
word_j[0] = 'a' + i;
words.push_back(word_i + ' ' + word_j);
}
}
}
}
}
sort(words.begin(), words.end());
long long uniqueCount = unique(words.begin(), words.end()) - words.begin();
return uniqueCount;
}
};
For this one, still TLE
Code version 3 (TLE, however, got one more case passed)
class Solution {
public:
long long distinctNames(vector<string>& ideas) {
const int n = ideas.size();
long long ret = 0;
//cout << n << endl;
//return ret;
if (n <= 1) return ret;
unordered_set<string> ideas_set(ideas.begin(), ideas.end());
vector<vector<pair<int, int>>> first_letter_mask(26);
vector<string> words;
for (int i = 0; i < n; ++i){
string idea = ideas[i];
int idx = idea[0] - 'a';
int letter_mask = 0;
for (int j= 0; j< 26; ++j){
if (j== idx)continue;
idea[0] = 'a' + j;
if (ideas_set.find(idea) == ideas_set.end()){
letter_mask |= (1<<j);
}
}
first_letter_mask[idx].emplace_back(i, letter_mask);
}
for (int i = 0; i < 26; ++i){
for (auto [word_i_idx, mask_i]: first_letter_mask[i]){
for (int j = 0; j< 26; ++j){
if (i == j) continue;
for (auto [word_j_idx, mask_j]: first_letter_mask[j]){
if ((mask_i & (1<<j)) && (mask_j & (1<<i))){
string word_i = ideas[word_i_idx];
string word_j = ideas[word_j_idx];
word_i[0] = 'a' + j;
word_j[0] = 'a' + i;
words.push_back(word_i + ' ' + word_j);
}
}
}
}
}
return words.size();
//sort(words.begin(), words.end());
//long long uniqueCount = unique(words.begin(), words.end()) - words.begin();
//return uniqueCount;
}
};
Actually, the last 3 steps are not needed, as for partA_partB, both partA and partB are new, so partA_partB should be new. Also, partB_partA != partA_partB as all elements are unique.
Then we can pass one more case :(
When checking the failed case 74, the ideas length is 8544, and the expected results is: 58,289,912, which is about 58M. Saving those combination words into a vector can be super slow and actually it is not necessary to do that.
Code version 4 (TLE, however, got 10 more cases passed)
class Solution {
public:
long long distinctNames(vector<string>& ideas) {
const int n = ideas.size();
long long ret = 0;
if (n <= 1) return ret;
unordered_set<string> ideas_set(ideas.begin(), ideas.end());
vector<vector<pair<int, int>>> first_letter_mask(26);
vector<string> words;
for (int i = 0; i < n; ++i){
string idea = ideas[i];
int idx = idea[0] - 'a';
int letter_mask = 0;
for (int j= 0; j< 26; ++i){
if (j == idx)continue;
idea[0] = 'a' + j;
if (ideas_set.find(idea) == ideas_set.end()){
letter_mask |= (1<<j);
}
}
first_letter_mask[idx].emplace_back(i, letter_mask);
}
for (int i = 0; i < 26; ++i){
for (auto [word_i_idx, mask_i]: first_letter_mask[i]){
for (int j = 0; j< 26; ++j){
if (i == j) continue;
for (auto [word_j_idx, mask_j]: first_letter_mask[j]){
if ((mask_i & (1<<j)) && (mask_j & (1<<i))){
ret ++;
}
}
}
}
}
return ret;
}
};
When checking the code again, the word_i_idx and word_j_idx are not needed.
Code version 5 (TLE, however, got no difference compared with previous one)
class Solution {
public:
long long distinctNames(vector<string>& ideas) {
const int n = ideas.size();
long long ret = 0;
if (n <= 1) return ret;
unordered_set<string> ideas_set(ideas.begin(), ideas.end());
vector<vector<int>> first_letter_mask(26);
vector<string> words;
for (int i = 0; i < n; ++i){
string idea = ideas[i];
int idx = idea[0] - 'a';
int letter_mask = 0;
for (int i = 0; i < 26; ++i){
if (i == idx)continue;
idea[0] = 'a' + i;
if (ideas_set.find(idea) == ideas_set.end()){
letter_mask |= (1<<i);
}
}
first_letter_mask[idx].emplace_back(letter_mask);
}
for (int i = 0; i < 26; ++i){
for (auto &mask_i: first_letter_mask[i]){
for (int j = 0; j< 26; ++j){
if (i == j) continue;
for (auto &mask_j: first_letter_mask[j]){
if ((mask_i & (1<<j)) && (mask_j & (1<<i))){
ret ++;
}
}
}
}
}
return ret;
}
};
when checking the test case 84: the idea size is 37_378, the expected answer is 1_136_333_536. From the result, we know that naive counting is impossible to pass. We need a smarter way. But slightly code change might be helpful if we cut the problem size by half.
Code version 6 (TLE, however, got 1 more case passed)
class Solution {
public:
long long distinctNames(vector<string>& ideas) {
const int n = ideas.size();
//cout << n << endl;
long long ret = 0;
if (n <= 1) return ret;
//return ret;
unordered_set<string> ideas_set(ideas.begin(), ideas.end());
vector<vector<int>> first_letter_mask(26);
vector<string> words;
for (int i = 0; i < n; ++i){
string idea = ideas[i];
int idx = idea[0] - 'a';
int letter_mask = 0;
for (int j = 0; j < 26; ++j){
if (j == idx)continue;
idea[0] = 'a' + j;
if (ideas_set.find(idea) == ideas_set.end()){
letter_mask |= (1<<j);
}
}
first_letter_mask[idx].emplace_back(letter_mask);
}
for (int i = 0; i < 26; ++i){
for (auto &mask_i: first_letter_mask[i]){
// Code update 1: j = 0 -> j = i + 1
for (int j = i + 1; j< 26; ++j){
if (i == j) continue;
for (auto &mask_j: first_letter_mask[j]){
if ((mask_i & (1<<j)) && (mask_j & (1<<i))){
// Code update 2: ret += -> ret += 2
ret += 2;
}
}
}
}
}
return ret;
}
};
Code version 7 (AC)
Since naive one does not work, we must improve the complexity. All the previous solution are in the time complexity of O(n²), which is why it can not AC. We need a faster one.
Actually, we can aggregate the number of words which is allowed for an idea to be converted to. Then the previous O(n²) step’s complexity reduced to O(26²). See the last several steps of the code. And then the whole solution’s complexity is O(n*26) see the first part of the code. The problem is solved.
class Solution {
public:
long long distinctNames(vector<string>& ideas) {
const int n = ideas.size();
long long ret = 0;
if (n <= 1) return ret;
unordered_set<string> ideas_set(ideas.begin(), ideas.end());
vector<vector<int>> cnt_based_first_letter(26, vector<int>(26, 0));
auto& cnt = cnt_based_first_letter;
vector<string> words;
for (int i = 0; i < n; ++i){
string idea = ideas[i];
int idx = idea[0] - 'a';
for (int j = 0; j < 26; ++j){
if (j == idx)continue;
idea[0] = 'a' + j;
if (ideas_set.find(idea) == ideas_set.end()){
cnt[idx][j]++;
}
}
}
for (int i = 0; i < 26; ++i){
for (int j = i + 1; j< 26; ++j){
ret += 2*(cnt[i][j]*cnt[j][i]);
}
}
return ret;
}
};
Python version
class Solution:
def distinctNames(self, ideas: List[str]) -> int:
if len(ideas) < 2:return 0
ideas_set = set(ideas)
cnt_based_first_letter = [[0 for _ in range(26)] for _ in range(26)]
cnt = cnt_based_first_letter
for idea in ideas:
idx = ord(idea[0]) - ord('a')
for i, c in enumerate("abcdefghijklmnopqrstuvwxyz"):
if c + idea[1:] not in ideas_set:
cnt[idx][i] += 1
return 2*sum(cnt[i][j]*cnt[j][i] for i in range(26) for j in range(i + 1, 26))
Take aways
It’s pretty fine to conquer the problem step by step. What I learned from this one is:
- When the return type is long long, be careful. The naive solution might be TLE
- When n is about 5*10⁴, O(n²) solution should not work.
- The only 26 letters or maximum word length is 10 suggests that the complexity should be O(26*n) or O(10*n) or maybe O(n*10²)