最大产品preFIX字符串 [英] Maximum product prefix string

查看：111 发布时间：2015/11/30 15:01:33 algorithm performance dynamic-programming kadanes-algorithm

本文介绍了最大产品preFIX字符串的处理方法，对大家解决问题具有一定的参考价值，需要的朋友们下面随着小编来一起学习吧！

问题描述

以下是从编码采访网站名为codility演示问题：

字符串S的preFIX是S.任何导致相邻的一部分。例如，C和鳕鱼的字符串codility的prefixes。为简单起见，我们要求prefixes是非空的。

串S的preFIX磷产物是出现的P通过P的长度乘以更多precisely的数目，如果preFIX P由K个字符和P恰好出现Ť倍在S，那么该产品等于K *吨。

例如，S =ABABABA具有以下prefixes：


一个，其乘积等于1 * 4 = 4，

在AB，其乘积等于2 * 3 = 6，

在阿坝，其乘积等于3 * 3 = 9，

在ABAB，其乘积等于4 * 2 = 8，

在巴，其乘积等于5 * 2 = 10，

在ABABAB，其产品等于6 * 1 = 6，

在ABABABA，其乘积等于7 * 1 = 7。


最长的preFIX是相同的原始字符串。我们的目标是选择这样的preFIX作为最大化了产品的价值。在上面的例子中的最大的产品是10

下面是我可怜的解决方案，在Java中需要O（N ^ 2）的时间。这显然是有可能做到这一点的O（N）。我在想Kadanes算法。但我想不出什么办法，我可以连接code一些信息，在每一步，让我找到了正在运行的最大。任何一个可以想到的O（N）算法的呢？

进口的java.util.HashMap;

一流的解决方案{
    公众诠释溶液（字符串s）{
        INT N = S.length（）;
        如果（N小于1 || N'GT; 300000）{
            的System.out.println（无效的长度）;
            返程（-1）;
        }
        HashMap的＆LT;字符串，整数＆GT; prefixes =新的HashMap＆LT;字符串，整数＆GT;（）;
        的for（int i = 0; I＆n种;我++）{
            字符串keystr =;
            对于（INT J =; J＆GT; = 0; j--）{
                keystr + = S.charAt（J）;
                如果（！prefixes.containsKey（keystr））
                    prefixes.put（keystr，keystr.length（））;
                其他{
                    INT的newval = prefixes.get（keystr）+ keystr.length（）;
                    如果（的newval＆GT; 10亿）收益10亿;
                    prefixes.put（keystr，的newval）;
                }
            }
        }
        INT maax1 = 0;
        对于（INT VAL：prefixes.values（））
            如果（VAL＆GT; maax1）
                maax1 = VAL;
        返回maax1;
    }
}

解决方案

下面是一个O之类的后缀阵列（N log n）的版本。还有的后缀数组为O（n）构造算法，我只是没有耐心code它们。

示例输出（这个输出是不为O（n），但它只是表明我们的确可以计算出所有的分数）：

  4 * 1
3 * 3 ABA
2 * 5贝巴
1 * 7 ABABABA
3 * 2从头
2 * 4 ABAB
1 * 6 ABABAB

基本上，你必须扭转字符串，并计算后缀阵列（SA）和最长的共同preFIX（LCP）。

然后你必须遍历数组SA向后寻找符合整个后缀（preFIX在原来的字符串）的液晶聚合物。如果有一个匹配，增加计数器，否则重置为1。每个后缀（preFIX）收到一个分数（SCR），对应于它出现在原始字符串的次数。

的#include＆LT;的iostream＆GT;
＃包括＆LT; CString的GT;
＃包括＆LT;字符串＆GT;
＃定义MAX 10050
使用名字空间std;

INT RA [MAX]，坦普拉[MAX]
INT SA [MAX]，tempSA [MAX]
INT C [MAX];
INT披[MAX]，PLCP [MAX]，LCP [MAX]

INT SCR [MAX]

无效suffix_sort（INT N，INT K）{
    memset的（C，0，sizeof的C）;

    的for（int i = 0;我n种;我++）
        C [I + K＆LT; N + RA [I + K]：0] ++;

    INT总和= 0;
    的for（int i = 0; I＆LT;最大（256，N）;我++）{
        INT T = C [I];
        C [i] =总和;
        总和+ = T;
    }

    的for（int i = 0;我n种;我++）
        tempSA [C [SA [I] + K＆LT; N + RA的[SA [i]于+ K]：0] ++] = SA [i]于;

    的memcpy（SA，tempSA，N *的sizeof（INT））;
}

无效suffix_array（串放大器; S）{
    INT N = s.size（）;

    的for（int i = 0;我n种;我++）
        RA的[I] = S [I]  -  1;

    的for（int i = 0;我n种;我++）
        SA [我] =我;

    为（中间体K = 1; K＆n种; K * = 2）{
        suffix_sort（N，K）;
        suffix_sort（N，0）;

        INT R =坦普拉[SA [0]] = 0;
        的for（int i = 1;我n种;我++）{
            INT S1 = SA [I]中，S2 = SA [I-1];
            布尔等于= TRUE;
            等于＆安培; = RA [S1] == RA [S2];
            等于＆安培; = RA [S1 + K] == RA [S2 + k]的;

            坦普拉[SA [I] =平等吗？ R：++ R;
        }

        的memcpy（RA，坦普拉，N *的sizeof（INT））;
    }
}

无效LCP（串放大器; S）{
    INT N = s.size（）;

    披[SA [0]] = -1;
    的for（int i = 1;我n种;我++）
        披[SA [I] = SA [I-1];

    INT L = 0;
    的for（int i = 0;我n种;我++）{
        如果（披[我] == -1）{
            PLCP [I] = 0;
            继续;
        }
        而（S [I + 1] == S [披[I] + 1]）
            大号++;

        PLCP [I] = L;
        L =最大值（L-1，0）;
    }

    的for（int i = 1;我n种;我++）
        LCP的[I] = PLCP [SA [I]];
}

空分（串放大器; S）{
    SCR [s.size（） -  1] = 1;

    INT总和= 1;
    的for（int i = s.size（） -  2; I＆GT; = 0;我 - ）{
        如果（LCP [I + 1]; s.size（） -  SA [Ⅰ] -1）{
            总和= 1;
        } 其他 {
            综上所述++;
        }
        SCR [I] =总和;
    }
}

诠释的main（）{
    字符串s =ABABABA;
    S =字符串（s.rbegin（），s.rend（））+。;

    suffix_array（多个）;
    LCP（S）;
    得分（多个）;

    的for（int i = 0; I＆LT; s.size（）;我++）{
        串纳秒= s.substr（SA [I]，s.size（） -  SA [Ⅰ] -1）;
        NS =字符串（ns.rbegin（），ns.rend（））;
        COUT＆LT;＆LT; SCR [1]  - ;＆其中; *＆其中;＆其中; ns.size（）＆其中;＆其中; ＆LT;＆LT; NS＆LT;＆LT; ENDL;
    }
}

这其中大部分code（特别是后缀阵列和LCP实现）我一直在使用了几年的比赛。这个版本在特殊的，我从这一个我几年前写了。

The following is a demo question from a coding interview site called codility:

A prefix of a string S is any leading contiguous part of S. For example, "c" and "cod" are prefixes of the string "codility". For simplicity, we require prefixes to be non-empty.

The product of prefix P of string S is the number of occurrences of P multiplied by the length of P. More precisely, if prefix P consists of K characters and P occurs exactly T times in S, then the product equals K * T.

For example, S = "abababa" has the following prefixes:

"a", whose product equals 1 * 4 = 4,

"ab", whose product equals 2 * 3 = 6,

"aba", whose product equals 3 * 3 = 9,

"abab", whose product equals 4 * 2 = 8,

"ababa", whose product equals 5 * 2 = 10,

"ababab", whose product equals 6 * 1 = 6,

"abababa", whose product equals 7 * 1 = 7.

The longest prefix is identical to the original string. The goal is to choose such a prefix as maximizes the value of the product. In above example the maximal product is 10.

Below is my poor solution in Java requiring O(N^2) time. It is apparently possible to do this in O(N). I was thinking Kadanes algorithm. But I can't think of any way that I can encode some information at each step that lets me find the running max. Can any one think of an O(N) algorithm for this?

import java.util.HashMap;

class Solution {
    public int solution(String S) {
        int N = S.length();
        if(N<1 || N>300000){
            System.out.println("Invalid length");
            return(-1);
        }
        HashMap<String,Integer> prefixes = new HashMap<String,Integer>();
        for(int i=0; i<N; i++){
            String keystr = "";
            for(int j=i; j>=0; j--) {
                keystr += S.charAt(j);
                if(!prefixes.containsKey(keystr))
                    prefixes.put(keystr,keystr.length());
                else{
                    int newval = prefixes.get(keystr)+keystr.length();
                    if(newval > 1000000000)return 1000000000;
                    prefixes.put(keystr,newval);
                }
            }
        }
        int maax1 = 0;
        for(int val : prefixes.values())
            if(val>maax1)
                maax1 = val;
        return maax1;
    }
}

解决方案

Here's a O(n log n) version based on suffix arrays. There are O(n) construction algorithms for suffix arrays, I just don't have the patience to code them.

Example output (this output isn't O(n), but it's only to show that we can indeed compute all the scores):

4*1 a
3*3 aba
2*5 ababa
1*7 abababa
3*2 ab
2*4 abab
1*6 ababab

Basically you have to reverse the string, and compute the suffix array (SA) and the longest common prefix (LCP).

Then you have traverse the SA array backwards looking for LCPs that match the entire suffix (prefix in the original string). If there's a match, increment the counter, otherwise reset it to 1. Each suffix (prefix) receive a "score" (SCR) that corresponds to the number of times it appears in the original string.

#include <iostream>
#include <cstring>
#include <string>
#define MAX 10050
using namespace std;

int RA[MAX], tempRA[MAX];
int SA[MAX], tempSA[MAX];
int C[MAX];                
int Phi[MAX], PLCP[MAX], LCP[MAX];

int SCR[MAX];

void suffix_sort(int n, int k) {
    memset(C, 0, sizeof C);        

    for (int i = 0; i < n; i++)        
        C[i + k < n ? RA[i + k] : 0]++;

    int sum = 0;
    for (int i = 0; i < max(256, n); i++) {                     
        int t = C[i]; 
        C[i] = sum; 
        sum += t;
    }

    for (int i = 0; i < n; i++)        
        tempSA[C[SA[i] + k < n ? RA[SA[i] + k] : 0]++] = SA[i];

    memcpy(SA, tempSA, n*sizeof(int));
}

void suffix_array(string &s) {             
    int n = s.size();

    for (int i = 0; i < n; i++) 
        RA[i] = s[i] - 1;              

    for (int i = 0; i < n; i++) 
        SA[i] = i;

    for (int k = 1; k < n; k *= 2) {     
        suffix_sort(n, k);
        suffix_sort(n, 0);

        int r = tempRA[SA[0]] = 0;
        for (int i = 1; i < n; i++) {
            int s1 = SA[i], s2 = SA[i-1];
            bool equal = true;
            equal &= RA[s1] == RA[s2];
            equal &= RA[s1+k] == RA[s2+k];

            tempRA[SA[i]] = equal ? r : ++r;     
        }

        memcpy(RA, tempRA, n*sizeof(int));
    } 
}

void lcp(string &s) {
    int n = s.size();

    Phi[SA[0]] = -1;         
    for (int i = 1; i < n; i++)  
        Phi[SA[i]] = SA[i-1];  

    int L = 0;
    for (int i = 0; i < n; i++) {
        if (Phi[i] == -1) { 
            PLCP[i] = 0; 
            continue; 
        }
        while (s[i + L] == s[Phi[i] + L]) 
            L++;

        PLCP[i] = L;
        L = max(L-1, 0);                      
    }

    for (int i = 1; i < n; i++)                 
        LCP[i] = PLCP[SA[i]];
}

void score(string &s) {
    SCR[s.size()-1] = 1;

    int sum = 1;
    for (int i=s.size()-2; i>=0; i--) {
        if (LCP[i+1] < s.size()-SA[i]-1) {
            sum = 1;
        } else {
            sum++; 
        }
        SCR[i] = sum;
    }
}

int main() {
    string s = "abababa";
    s = string(s.rbegin(), s.rend()) +".";

    suffix_array(s);
    lcp(s);
    score(s);

    for(int i=0; i<s.size(); i++) {
        string ns = s.substr(SA[i], s.size()-SA[i]-1);
        ns = string(ns.rbegin(), ns.rend());
        cout << SCR[i] << "*" << ns.size() << " " << ns << endl;
    }
}

Most of this code (specially the suffix array and LCP implementations) I have been using for some years in contests. This version in special I adapted from this one I wrote some years ago.

这篇关于最大产品preFIX字符串的文章就介绍到这了，希望我们推荐的答案对大家有所帮助，也希望大家多多支持IT屋！

查看全文

最大产品preFIX字符串 [英] Maximum product prefix string

问题描述

相关文章

C/C++最新文章

热门教程

热门工具

登录关闭

最大产品preFIX字符串 [英] Maximum product prefix string

问题描述

相关文章

C/C++最新文章

热门教程

热门工具

登录 关闭

登录关闭