Here is a snippet of the file.
Output:
number|assigned_to|business_duration|description|priority|sys_created_by|opened_at|u_pending_closed_on|resolved_by|sys_created_on
INC3270235|Most of these are blank|0|Error messages go here|3|700000120|5/6/17 3:20|5/6/17 15:20
INC3271723|Most of these are blank|0|Error messages go here|3|700000120|5/7/17 9:43|5/7/17 9:43
INC3279067|Most of these are blank|0|Error messages go here|3|700000120|5/8/17 9:03|5/8/17 9:03
INC3319147|Most of these are blank|0|Error messages go here|3|700000120|5/11/17 1:24|5/11/17 1:24
INC3331818|Most of these are blank|0|Error messages go here|3|700000120|5/11/17 3:21|5/11/17 15:21
INC3331966|Most of these are blank|0|Error messages go here|3|700000120|5/11/17 3:31|5/11/17 15:31
INC3335862|Most of these are blank|0|Error messages go here|3|700000120|5/12/17 6:17|5/12/17 6:17
INC3344128|Most of these are blank|0|Error messages go here|3|700000120|5/12/17 3:11|5/12/17 15:11
INC3368733|Most of these are blank|0|Error messages go here|3|700000120|5/16/17 1:24|5/16/17 1:24
it is the "sys_created_on" date that I need to use as the filter.
Here is the script I need to modify in order to get this to happen.
I did not write this but I'm being asked to learn how to support it and improve it.
this is my first experience with python so I need a good kick start.
To note the script will not produce output unless the raw_data is delimited with |s.
The reason is that the actual descriptions(err) can contain any number of commas.
Generally the descriptions are IO Errors written into the tickets.
This also explains why there is so much clean up of the data in the script.
Also if you see ways to improve this script I'm open to suggestions.
At this time I can only hope to someday be able to write code in a very pythonic way.
#!/usr/bin/python
#IMPORTED LIBRARIES
import csv
import re
#FILE PATHES
raw_data = "ESC_Incidents.csv" # file path to the raw Data
servicenow_analysis_output = "analysis_output.txt" # path to new text file
#OPEN FILES
data = open(raw_data, "r") # creates file descriptor and opens raw data file as read only
output = open(servicenow_analysis_output, "w") # opens and create the new test file for the ouput to be written to
# CREATING DATA STRUCTURE
# the data is stored in a dictionary such that the column title is the key and the value is the list of all values within that column
parsedData = {} #instantiates the dictionary
#creating list for each column
parsedData['number'] = []
parsedData['assigned_to'] = []
parsedData['business_duration'] = []
parsedData['description'] = []
parsedData['priority'] = []
parsedData['sys_created'] = []
parsedData['opened_at'] = []
parsedData['u_pending_closed_on'] = []
parsedData['resolved_by'] = []
parsedData['sys_created_on'] = []
#FUNCTIONS
#this function will parse and store the data into the data structure constructed prior
def parseTheData(fd, parsedData): #fd stands for file descriptor and will be data for out purposes
entireFile = fd.read() #reads entire file in
entireFile = re.sub(r"\|\|","|NULL|", entireFile) #adds NULL in place of an empty value
entireFile = re.sub(r"INC[0-9]*\|[a-zA-z]+","|\g<0>", entireFile) # adds another pipe before all incident numbers to allow parsing the file correctly
entireFile = entireFile.split("|") #separates out the columns
cleanedData = list()
for item in entireFile:
cleanedData.append(re.sub(',+|\\r|\\n|\"\"+', '', item))
#stores the parsed data into the data structure
count = 0
for value in entireFile:
value = value.lower()#converts all strings to lowercase before entering them into the data structure
count += 1
if count == 1:
parsedData['number'].append(value)
if count == 2:
parsedData['assigned_to'].append(value)
if count == 3:
parsedData['business_duration'].append(value)
if count == 4:
parsedData['description'].append(value)
if count == 5:
parsedData['priority'].append(value)
if count == 6:
parsedData['sys_created'].append(value)
if count == 7:
parsedData['opened_at'].append(value)
if count == 8:
parsedData['u_pending_closed_on'].append(value)
if count == 9:
parsedData['resolved_by'].append(value)
if count == 10:
parsedData['sys_created_on'].append(value)
count = 0
return parsedData
#This function will remove all punctuation and make all letters lower case within the descriptions
def cleanTheData(ds):
#removes excess characters from sys_created_on values
cleanedData = list() #clears list
for values in ds['sys_created_on']:
cleanedData.append(re.sub(',+|\\r|\\n|', ' ', values))
ds['sys_created_on'] = cleanedData
return ds
def findContent(ds):
dbErrors = dict()
#iterates through all descriptions and pulls the db and the error message
for message in ds["description"]:
#searches for the error messages using regular expressions
e = re.search('\"\"responsemessages\"\":\[{\"+message\"+:\"+[\w+ ,.:;-]+\"+', message)
print(e)
if e: #when error found remove exess words and punctuation just leaving the message
errMsg = ' '.join((re.sub('\W+', ' ', e.group(0)).split(' ')[3:]))
#searches for databasePlatform
db = re.search('\"+resources\"+\:\[\"+[\w+ ]+\"+\]', message)
if db:#when db platform is found removes excess puntucation and words
dbPlatform = ' '.join((re.sub('\W+', ' ', db.group(0)).split(' ')[2:]))
#stores the count of errors found per database in dictionary of dictionaries
#dbErrors[database platform][error message] = count
if dbPlatform in dbErrors.keys():
if errMsg in dbErrors[dbPlatform].keys():
dbErrors[dbPlatform][errMsg] += 1
else:
dbErrors[dbPlatform][errMsg] = 1
else:
dbErrors[dbPlatform] = 1
dbErrors[dbPlatform][dbPlatform] = 1
return dbErrors
#MAIN LOGIC
parsedData = parseTheData(data,parsedData)
parsedData = cleanTheData(parsedData)
dbErrors =findContent(parsedData)
print(dbErrors)
# CLOSE THE FILES
data.close()
output.close()