Python Forum
Imitating C's strtol() function.
Thread Rating:
  • 0 Vote(s) - 0 Average
  • 1
  • 2
  • 3
  • 4
  • 5
Imitating C's strtol() function.
#1
An exercise to imitate more or less the C function strtol that parses an integer at a given position in a string with a given base in the range 0-36.
from functools import lru_cache
import re
__version__ = '2019.06.06'

re_base_zero = re.compile(r"(?i)\s*[\+\-]?0(x)?")

@lru_cache(10)
def strtol_re(base):
    if not (0 <= base <= 36):
        raise ValueError('Expected base between 0 and 36.')
    a = r"(?i)\s*(?:[\+\-]?)"
    if base == 16:
        r = a + r"(?:0x)?[\u0030-\u0039\u0041-\u0046]*"
    elif base > 10:
        r = (a + "[\u0030-\u0039\u0041-{}]*".format(
            chr(ord('A') + base - 10)))
    else:
        r = (a + "[\u0030-{}]*".format(chr(ord('0') + base - 1)))
    return re.compile(r)

class Strtol:
    """Small object to store result of conversion of string to int
    
    Arguments:
        s       : string to read
        base=10 : integer base between 0 and 36 inclusive.
        pos=0   : position where to read in the string.
        
    Strtol members:
        value   : an integer value parsed in the string.
        string  : the string that was read.
        pos     : the position where the integer was parsed.
        endpos  : the position in the string after the integer.
        
    Errors:
        If no valid conversion could be performed, ValueError is raised.

    see also:
        the linux manual of strtol
    """
    __slots__ = ('value', 'string', 'pos', 'endpos')
    
    def __init__(self, s, base=10, pos=0):
        if base == 0:
            m = re_base_zero.match(s, pos=pos)
            if m:
                base = 16 if m.group(1) else 8
            else:
                base = 10
        r = strtol_re(base)
        #print(r)
        #print(s[pos:])
        m = r.match(s, pos=pos)
        if m:
            self.value = int(m.group(0), base)
            self.string = s
            self.pos = pos
            self.endpos = m.end()
        else:
            raise ValueError('Cannot convert to int')

if __name__ == '__main__':
    import unittest
    
    class TestStrtol(unittest.TestCase):
        def test_base_10(self):
            x = 'foo-324bar'
            s = Strtol(x, pos=3)
            self.assertEqual(s.value, -324)
            self.assertEqual(s.endpos, 7)
            self.assertEqual(s.pos, 3)
            self.assertIs(s.string, x)

        def test_base_16(self):
            x = '  324Bbar'
            s = Strtol(x, base=16)
            self.assertEqual(s.value, int('324BBA', 16))
            self.assertEqual(x[s.endpos:], 'r')

        def test_base_16_0x(self):
            x = '  -0x324Bbar'
            s = Strtol(x, base=16)
            self.assertEqual(s.value, -int('324BBA', 16))
            self.assertEqual(x[s.endpos:], 'r')

        def test_base_0_0x(self):
            x = '  -0x324Bbar'
            s = Strtol(x, base=0)
            self.assertEqual(s.value, -int('324BBA', 16))
            self.assertEqual(x[s.endpos:], 'r')

        def test_base_0(self):
            x = '  -324Bbar'
            s = Strtol(x, base=0)
            self.assertEqual(s.value, -324)
            self.assertEqual(x[s.endpos:], 'Bbar')

        def test_base_0_octal(self):
            x = '  -0324Bbar'
            s = Strtol(x, base=0)
            self.assertEqual(s.value, -int("324", 8))
            self.assertEqual(x[s.endpos:], 'Bbar')

        def test_base_20(self):
            x = '  -0324BgGar'
            s = Strtol(x, base=20)
            self.assertEqual(s.value, -int("324BgGa", 20))
            self.assertEqual(x[s.endpos:], 'r')

        def test_empty_string(self):
            x = ''
            self.assertRaises(ValueError, Strtol, x)

    unittest.main()
Reply
#2
I got an error for the empty string:
I would expect 0.

x = ''
s = Strtol(x)
print(s.value)
Output:
Traceback (most recent call last): File "./zz.py", line 61, in <module> s = Strtol(x) File "./zz.py", line 52, in __init__ self.value = int(m.group(0), base) ValueError: invalid literal for int() with base 10: ''
Reply
#3
heiner55 Wrote:I got an error for the empty string:

Well, in C, the function strtol does return 0 for an empty input string as the following C code demonstrates
#include <stdio.h>
#include <stdlib.h>

int main(int argc, char*argv[]){
    printf("%ld\n", strtol("", NULL, 10));
}
However, the documentation says
Linux man page Wrote:The implementation may also set errno to EINVAL in case no conversion was performed (no digits seen, and 0 returned).
In python, we don't need to set errnos, instead we can raise exceptions. In this case, it seems to me that the most pythonic solution is to raise an exception. We could perhaps specialize the exception by raising an appropriate subtype of ValueError. What do you think about it?
Reply
#4
That is ok.
Maybe you could add something like that to your doc:

If no valid conversion could be performed, ValueError is raised.
Reply
#5
heiner55 Wrote:Maybe you could add something like that to your doc
The code has been updated to include that.
Reply
#6
Thanks
Reply


Forum Jump:

User Panel Messages

Announcements
Announcement #1 8/1/2020
Announcement #2 8/2/2020
Announcement #3 8/6/2020