Wednesday 7 February 2018

Converting a number range to regex for search using grep on linux command line

I often have to grep filenames having numbers in their names with numbers in specific range to get some statistics from many log files. Could not find a straightforward existing method to do this. So wrote this python function to convert a number range into regex for grep. Hope it is helpful for others also.
       
def get_highest_number(start_number, i):
    m = int(math.pow(10,i))
    return ((int(start_number/m) + 1)*m) -1
    
def get_regex_from_number_range(start_number, end_number):
    
    end_number_string = str(end_number)
    start_number_string = str(start_number)
    num_digits_end_number = len(str(end_number_string))
    num_digits_start_number = len(str(start_number_string))
    
    pattern = '_@'
    i = 0
    intermediate_start_number = get_highest_number(start_number, i+1) 
    while intermediate_start_number <=end_number:
        #print intermediate_start_number
        #print start_number_string
        if i==0:
            pattern = pattern + '('
        else:
            pattern = pattern + '|'
      
        for j in range(0 , num_digits_start_number -(i + 1)):
            pattern = pattern+ start_number_string[j]
        pattern = pattern + "[" + start_number_string[num_digits_start_number -(i + 1)] + "-9]"
        for j in range(num_digits_start_number - i,num_digits_start_number):
            pattern = pattern + "[0-9]"
        i = i + 1
        start_number_string = str(intermediate_start_number + 1)
        intermediate_start_number = get_highest_number(intermediate_start_number + 1, i+1)
        num_digits_start_number = len(str(start_number_string))
    
    #Number of digit in intermediate start number same as number of digits in end number
    intermediate_start_number = int(start_number_string)
    i = 0
    while intermediate_start_number < end_number:
        #print intermediate_start_number
        #print start_number_string
        if(int(end_number_string[i]) > int(start_number_string[i])):
            pattern = pattern + "|"
            for j in range(0,i):
                pattern = pattern + end_number_string[j]
            pattern = pattern + "[" + start_number_string[i] + "-" + str(int(end_number_string[i])-1) + "]"
            for j in range(i+1, num_digits_end_number):
                pattern = pattern + "[0-9]"
            intermediate_start_number = intermediate_start_number + (int(end_number_string[i]) - int(start_number_string[i]))*int(math.pow(10,num_digits_end_number - (i+1)))
            start_number_string = str(intermediate_start_number)
        i = i +1 
    if intermediate_start_number == end_number:
        pattern = pattern + "|" + end_number_string
    
    pattern = pattern + ')'
        
    
    return pattern


       
 

No comments:

Post a Comment