Python Forum
unexpected output while parsing file - Printable Version

+- Python Forum (https://python-forum.io)
+-- Forum: Python Coding (https://python-forum.io/forum-7.html)
+--- Forum: General Coding Help (https://python-forum.io/forum-8.html)
+--- Thread: unexpected output while parsing file (/thread-9747.html)



unexpected output while parsing file - anna - Apr-26-2018

import re
dnlist = []
maclist = []

with open('pppoe.txt','r') as f:
     for line in f:
         if '@ttml' in line:
             dn = re.findall(r'\d{10}',line)
             dnlist.append(dn)
         if 'svc:1' in line:
             mac = re.findall(r'mac:[a-fA-F0-9]{2}[:][a-fA-AF0-9]{2}[:][a-fA-F0-9]{2}[:][a-fA-F0-9]{2}[:][a-fA-F0-9]{2}[:][a-fA-F0-9]{2}',line)
             maclist.append(mac)
             #next
     for dn,mac in zip(dnlist,maclist):
         print('{} {}'.format(dn,mac))
Output:
['0200200003'] ['mac:80:26:89:c7:d4:43'] ['0200200005'] ['mac:28:28:5d:f4:16:98'] ['0200200006'] ['mac:6c:72:20:54:78:8d'] ['0200200008'] ['mac:90:8d:78:73:08:79'] ['0200200016'] ['mac:00:17:7c:78:74:56'] ['0200200017'] ['mac:00:17:7c:77:af:d7']
expected output as below

Output:
['0200200003'] ['mac:00:17:7c:78:74:56'] ['0200200005'] ['mac:00:17:7c:77:af:d7'] ['0200200006'] ['mac:00:17:7c:77:ab:26'] ['0200200008'] ['mac:80:26:89:ca:62:8b'] ['0200200016'] ['mac:00:17:7c:7d:41:22'] ['0200200017'] ['mac:00:17:7c:7d:60:7a']
its taking, dn (10 digit number) in containing @ttml but mac is taking from first line.

File
[inline]-
===============================================================================
PPP sessions for service 1
===============================================================================
User-Name
Descr.
Up Time Type Termination IP/L2TP-Id/Interface-Id MC-Stdby
-------------------------------------------------------------------------------
-
svc:1 sap:lag-50:639.3619 mac:80:26:89:c7:d4:43 sid:5132
0d 00:00:02 oE local N/A
-
svc:1 sap:lag-50:91.3601 mac:28:28:5d:f4:16:98 sid:7733
0d 00:00:01 oE local N/A
-
svc:1 sap:lag-50:91.3601 mac:6c:72:20:54:78:8d sid:7734
0d 00:00:01 oE local N/A
-
svc:1 sap:lag-50:2554.2000 mac:90:8d:78:73:08:79 sid:3028
0d 00:00:00 oE local N/A
0200200003@ttml
svc:1 sap:lag-50:1775.2000 mac:00:17:7c:78:74:56 sid:5355
0d 00:48:53 oE local 49.248.184.41
0200200005@ttml
svc:1 sap:lag-50:1779.2000 mac:00:17:7c:77:af:d7 sid:1353
0d 08:59:48 oE local 49.248.196.53
0200200006@ttml
svc:1 sap:lag-50:2851.3739 mac:00:17:7c:77:ab:26 sid:2264
0d 10:30:58 oE local 49.248.93.206
00:42:0F:5C:02:42:0F:5C
0200200008@ttml
svc:1 sap:lag-50:79.3609 mac:80:26:89:ca:62:8b sid:3785
0d 00:11:03 oE local 114.143.116.235
82:26:89:FF:FE:CA:62:8B
0200200016@ttml
svc:1 sap:lag-50:1720.3620 mac:00:17:7c:7d:41:22 sid:4868
0d 07:40:53 oE local 49.248.177.226
0200200017@ttml
svc:1 sap:lag-50:1724.3624 mac:00:17:7c:7d:60:7a sid:6819
0d 09:21:16 oE local 49.248.53.11
[/inline]


RE: unexpected output while parsing file - anna - Apr-27-2018

if I am deleting first few lines (line start with xxxxxxx@ttml), its working perfectly, But these lines are not constant in number everytime. is there any way to tackle this?

= Works well if I delete below lines =

===============================================================================
PPP sessions for service 1
===============================================================================
User-Name
Descr.
Up Time Type Termination IP/L2TP-Id/Interface-Id MC-Stdby
-------------------------------------------------------------------------------
-
svc:1 sap:lag-50:639.3619 mac:80:26:89:c7:d4:43 sid:5132
0d 00:00:02 oE local N/A
-
svc:1 sap:lag-50:91.3601 mac:28:28:5d:f4:16:98 sid:7733
0d 00:00:01 oE local N/A
-
svc:1 sap:lag-50:91.3601 mac:6c:72:20:54:78:8d sid:7734
0d 00:00:01 oE local N/A
-
svc:1 sap:lag-50:2554.2000 mac:90:8d:78:73:08:79 sid:3028
0d 00:00:00 oE local N/A


RE: unexpected output while parsing file - killerrex - Apr-28-2018

The problem is that you have more entries with "svc:1" than with "@ttml" but zip is hidding you this as it will stop when the shortest iterable is empty.

One solution can be to obtain directly the mapping as a dictionary:
import re

macdn = {}

with open('pppoe.txt','r') as f:
    dn = None
    for line in f:
        if '@ttml' in line:
            dn = re.findall(r'\d{10}',line)[0]

        if 'svc:1' in line and dn is not None:
            mac = re.findall(r'mac:[a-fA-F0-9]{2}[:][a-fA-AF0-9]{2}[:][a-fA-F0-9]{2}[:][a-fA-F0-9]{2}[:][a-fA-F0-9]{2}[:][a-fA-F0-9]{2}',line)
            
            if dn in macdn:
                macdn[dn].extend(mac)
            else:
                macdn[dn] = list(mac)
            dn = None

for dn in macdn:
    print('{} {}'.format(dn, macdn[dn]))
This allows for several macs with same dns. You can invert the logic easily to obtain a dictionary for one mac with several dns)

If you need to keep the 2 list format for something the same idea works: do not append to the lists until you have a valid dn:
import re

dnlist = []
maclist = []
 
with open('pppoe.txt','r') as f:
    dn = None
    for line in f:
        if '@ttml' in line:
            dn = re.findall(r'\d{10}',line)

        if 'svc:1' in line and dn is not None:
            mac = re.findall(r'mac:[a-fA-F0-9]{2}[:][a-fA-AF0-9]{2}[:][a-fA-F0-9]{2}[:][a-fA-F0-9]{2}[:][a-fA-F0-9]{2}[:][a-fA-F0-9]{2}',line)
            
            dnlist.append(dn)
            maclist.append(mac)
            dn = None

for dn, mac in zip(dnlist, maclist):
    print('{} {}'.format(dn, mac))
Finally if you want to record all the mac you can just add the '-' to the dn list:
import re

dnlist = []
maclist = []
 
with open('pppoe.txt','r') as f:
    for line in f:
        if line.strip() == '-':
            dnlist.append(['-'*10])
    
        if '@ttml' in line:
            dn = re.findall(r'\d{10}',line)
            dnlist.append(dn)

        if 'svc:1' in line:
            mac = re.findall(r'mac:[a-fA-F0-9]{2}[:][a-fA-AF0-9]{2}[:][a-fA-F0-9]{2}[:][a-fA-F0-9]{2}[:][a-fA-F0-9]{2}[:][a-fA-F0-9]{2}',line)
            maclist.append(mac)

for dn, mac in zip(dnlist, maclist):
    print('{} {}'.format(dn, mac))



RE: unexpected output while parsing file - anna - Apr-28-2018

Thanks, its working perfectly... I have modified code for MAC OUI and working.

import re

def macvendor(mac_add):
           vendor = p.get_manuf(mac_add)
           return vendor

if __name__ == "__main__":
     dnlist = []
     maclist = []
     with open('bulkdn.txt','r') as f:
          for line in f:
              if line.strip() == '-':
                  dnlist.append(['-'*10])

              if '@ttml' in line:
                  dn = re.findall(r'\d{10}',line)
                  dnlist.append(dn)

              if 'svc:1' in line:
                  mac = re.findall(r'mac:[a-fA-F0-9]{2}[:][a-fA-AF0-9]{2}[:][a-fA-F0-9]{2}[:][a-fA-F0-9]{2}[:][a-fA-F0-9]{2}[:][a-fA-F0-9]{2}',line)
                  maclist.append(mac)

          i=0
          from manuf import manuf
          p = manuf.MacParser(update=True)
          for dn,mac in zip(dnlist,maclist):
               mac = (", ".join(mac)).split('mac:')[1]
               print('{} {} {} {}'.format(i,(", ".join(dn)),mac,macvendor(mac)))
               i+=1
Output:
24128 7120801937 00:26:5a:44:0a:21 D-Link 24129 7120801943 00:17:7c:80:56:83 Smartlin 24130 7120801963 00:17:7c:61:65:aa Smartlin 24131 7120801964 00:17:7c:6c:5a:28 Smartlin 24132 7120801987 08:35:71:ff:2c:3a Caswell 24133 7120801995 00:02:b6:44:f2:b5 Acrosser 24134 7120801996 00:17:7c:70:54:1e Smartlin 24135 7120802002 00:17:7c:75:c6:06 Smartlin 24136 7120802009 00:17:7c:78:8c:18 Smartlin 24137 7120802021 00:17:7c:75:6d:7e Smartlin 24138 7120802027 00:17:7c:80:4b:e3 Smartlin 24139 7120802036 10:62:eb:6a:b3:b6 D-LinkIn 24140 7120802046 00:17:7c:77:af:cf Smartlin 24141 7120802048 00:17:7c:6e:10:25 Smartlin