Python Forum

Full Version: How I can speed up my Cython module?
You're currently viewing a stripped down version of our content. View the full version with proper formatting.
Python 3.7.5rc1, Cython 0.29.13

Hello, I created an ndarray in which, when reading a file line by line, I accumulate the first and last transaction price in the context of clients:
dtype=np.dtype([('client','U13'), ('day_begin','u4'), ('day_end','u4'), ('price_begin','f4'), ('price_end','f4')])   
return np.empty(0, dtype=dtype)
My Cython module, in which I get the lines from the csv- file and divide them into fields and convert them to the desired values to accumulate transaction statistics:
import numpy as np
cimport numpy as np

cpdef process_string(str file_str, str mask_client, str code, int end_day_int, np.ndarray periods_clients, dict line_by_client):
    if not file_str:
        return False, periods_clients
    
    cdef list fields = file_str.split(';')
    
    cdef str client = fields[3]
    if client.find(mask_client)==-1 and client!=code:
        return False, periods_clients
      
    cdef int current_date = _get_date_from_str(fields[1])
    if current_date > end_day_int:
        return True, periods_clients
    
    cdef double current_price = _convert_price_to_float(fields[5]) 
    periods_clients = _add_data_to_array(client, current_date, current_price, periods_clients, line_by_client)
    
    return False, periods_clients

cdef double _convert_price_to_float(str price):
    return np.float64(price.replace(',', '.'))
 
cdef int _get_date_from_str(str date_str):
    return 10000*int(date_str[6:10]) + 100*int(date_str[3:5]) + int(date_str[0:2])
    
cdef _add_data_to_array(str client, int current_date, double current_price, np.ndarray array, dict line_by_client):
        index_line = line_by_client.setdefault(client)
        if index_line is None:
            new_array = np.zeros(1, array.dtype)
            line = new_array[0]
            line['client'] = client
            line['day_begin'] = current_date
            line['price_begin'] = current_price
            line['day_end'] = current_date
            line['price_end'] = current_price
             
            array = np.append(array, new_array)
            line_by_client[client] = len(array)-1
             
        else:
            array[index_line]['day_end'] = current_date
            array[index_line]['price_end'] = current_price
            
        return array
Cython version works almost at the same speed as the python one. How I can speed up this module?