#!/usr/bin/env python
# regex automated generation
# by sandro gauci
# do not distribute yet
import re
def regexbrute(teststring,initre='',groups=True):
    import string    
    regexrange = list(string.lowercase)
    regexrange += list(string.uppercase)
    regexrange += list(string.digits)
    regexrange += list(string.whitespace)
    if groups:
        regexrange += ['\.','-']
        regexrange += ['\d','[a-fA-F0-9]','\w','\s']
    #regexrange += ["{1,2}"]
    for x in regexrange:
        testre = initre + x
        m = re.match(testre,teststring)
        if m is not None:
            yield(testre)
            
def regextest(regex,strings):
    for string in strings:
        if re.match(regex,string) is None:
            return False
    return True

def findrepetition(regex,strings):
    finalregex = regex
    i = 2
    minimum = 1
    maximum = 1
    matchall = True
    matchone = True
    while matchall:
        newregex = regex + '{%s}' % i        
        i += 1
        for string in strings:
            if re.match(newregex,string) is None:
                matchall = False
                break
        if matchall:
            minimum = i-1
            finalregex = newregex
    i -= 1
    if minimum > 1:        
        while matchone:
            matchone = False
            newregex = regex + '{%s}' % i
            #print "xx",newregex
            i += 1
            for string in strings:
                if re.match(newregex,string) is not None:                    
                    matchone = True
                    maximum = i-1
                    #print "matchone",maximum
    if maximum > 1:
        finalregex = regex + '{%s,%s}' % (minimum,maximum)
    elif minimum > 1:
        finalregex = regex + '{%s}' % (minimum)
    return finalregex

def generateregex(stuff,groups=True):
    currentregex=''
    strings = stuff
    if len(strings) == 0:
        return ''
    string = strings[0]
    while 1:
        match = regexbrute(string,currentregex,groups=groups)
        prevregex = currentregex
        try:
            currentregex = match.next()
        except StopIteration:
            break
        while not regextest(currentregex,strings):        
            try:
                currentregex = match.next()
            except StopIteration:
                currentregex = prevregex
                break
        if currentregex == prevregex:
            break
        currentregex = findrepetition(currentregex,strings)        
    return currentregex

def cleanact(stuff):
    stuffcp = stuff[:]
    candidates = dict()    
    for thing in stuff:
        s = thing.split()[0]
        for thingcp in stuffcp:
            if thingcp.startswith(s):
                if not candidates.has_key(s):
                    candidates[s] = 0
                candidates[s] += 1    
    vals = candidates.values()
    vals.sort(reverse=True)
    maxmatch = vals[0]
    loosers=list()
    for c in candidates.keys():
        if candidates[c] != maxmatch:
            loosers.append(c)
    for thing in stuff:
        for looser in loosers:
            if thing.startswith(looser):
                try:
                    stuff.remove(thing)
                except ValueError:
                    pass
    return stuff

def getbestmatch(stuff):
    from regen import generateregex
    return generateregex(cleanact(stuff),groups=False)


if __name__ == "__main__":
    # get pattern of to tag generated by asterisk
    stuff = \
"""as79dc6a19
as71abb6a5
as1539e695
as2f5a4a5c
as737b2d15
as522cf696
as3c28c041
as18f51e5d
as30c67500
as143d9f50
as56961a2b
as4fa1e751
as79e8cfed
as0d71fedd
as4877352b
as25e96203
as316bc91f
as70f90a8b
as0e210b64
as059414ff
as76d576ba
as552be295
as71fe09e4
as629ac20c
as7e38fdd3
as45d033e9
as3a50e3fd
as5d0f1be9
as719d50e0
as1182ddf2
as6aa1839d
as1f24be2b
as162ff2b4
as2f17bf61
as1beee964
as14ecdccd
as265d1496
as26c2f484
as201a90c8
as166fb461
as7b3bdcea
as1a39fc3a
as494a8065
as72ec8ee9
as033518df
as428039ab
as24d181bc
as3079f6da
as0444e4cc
as75e5e132
as66515468
as5a3df9fd
as4b49c60a
as0cfbcf3a
as781231da
as2a59181b
as26c345af
as0fe9f831
as537ffa69
as13ae012e
as0c3f869a
as14650226
as58ab1e9b
as75c397e3""".splitlines()
    print generateregex(stuff)
