Help with python code to search string in one file & replace with line in other file

mforthman · (This post was last modified: Dec-19-2017, 03:10 PM by mforthman.)

Was getting some No such directory errors and was able to figure out how to modify the code so it would work (using MacOS system). Code below. Running it only replaces some of the targeted headers, specifically it seems to only replace those formatted that have Clavigralla and Anoplocnemis. I think I see why and will play with the script some more.

#!/usr/bin/env python

# Replace header inoriginal file header with header in header file, writing output to outputfile
# Larz60+
# from pathlib import Path
import os
import sys
import argparse


class SwapHeaders:
    def __init__(self, origfile=None, headerfile=None, outfile=None):
        # Note Modern pathlib objects removed because they won't work in
        # outdated python 2.7
        # self.home = Path('.')
        # self.data = self.home / 'data'
        # self.original_file = self.data / origfile
        # self.header_file = self.data / headerfile
        # self.out_file = self.data / outfi

        # with self.header_file.open() as fh:
        #     self.header_data = fh.readlines()

        # self.orig = self.original_file.open()
        # self.fo = self.out_file.open('w')

        self.home = os.getcwd()
        self.data = self.home + '/data/'
        self.original_file = self.data + origfile
        self.header_file = self.data + headerfile
        self.out_file = self.data + outfile

        with open(self.header_file, 'r') as fh:
            self.header_data = fh.readlines()

        self.orig = open(self.original_file, 'r')
        self.fo = None

    def close_files(self):
        self.orig.close()

    def get_replacement_header(self, match):
        retrec = None
        for line in self.header_data:
            if not line.startswith('>'):
                continue
            if match in line:
                retrec = line
                break
        return retrec

    def read_orig_record(self):
        """
        original file record read
        :return: data or False
        """
        while True:
            data = self.orig.readline()
            if not data:
                break
            yield data

    def make_new_file(self):
        # with self.out_file.open('w') as fo:
        with open(self.out_file, 'w') as fo:
            for orig in self.read_orig_record():
                match = None
                if orig.startswith('>'):
                    match = orig[1:]
                    x = match.rfind('.')
                    if x:
                        match = match[:x]
                    new = self.get_replacement_header(match)
                    if new is not None:
                        fo.write(new)
                    else:
                        fo.write(orig)
                else:
                    fo.write(orig)


def main():
    # Typical command line call python SwapHeaders.py -i 'File1.txt' -b 'File2.txt' -o 'Fileout.txt'
    parser = argparse.ArgumentParser()
    parser.add_argument("-i", "--ifile",
                        dest='original_filename',
                        help="Filename where headers are to be replaced",
                        action="store")

    parser.add_argument("-b", "--bfile",
                        dest='replace_original_filename',
                        help="Filename containing body",
                        action="store")

    parser.add_argument("-o", "--ofile",
                        dest='out_filename',
                        help="Output filename",
                        action="store")

    args = parser.parse_args()
    original_filename = args.original_filename

    replace_original_filename = args.replace_original_filename

    out_filename = args.out_filename

    sh = SwapHeaders(origfile=original_filename, headerfile=replace_original_filename, outfile=out_filename)
    sh.make_new_file()
    sh.close_files()


if __name__ == '__main__':
    main()

If I change line 70 'x = match.rfind('.')' to 'x = match.rfind('seq1')', that certainly will select the other targeted headers, but it will include headers that have, e.g., 'seq1_A_' and 'seq1_B_' which I do not want to include. Is there a way to get the match.rfind search term to exclude these instances or to just include seq1_[some numerical digits]?

Possibly Related Threads…
Thread		Author	Replies	Views	Last Post
	Cannot get cmd to print Python file	Schauster	11	565	May-16-2024, 04:40 PM Last Post: xMaxrayx
	Matching string from a file	tester_V	5	559	Mar-05-2024, 05:46 AM Last Post: Danishhafeez
	Python openyxl not updating Excel file	MrBean12	1	429	Mar-03-2024, 12:16 AM Last Post: MrBean12
	Python logging RotatingFileHandler writes to random file after the first log rotation	rawatg	0	484	Feb-15-2024, 11:15 AM Last Post: rawatg
	Unable to understand the meaning of the line of code.	jahuja73	0	381	Jan-23-2024, 05:09 AM Last Post: jahuja73
	connect sql by python using txt. file	dawid294	2	537	Jan-12-2024, 08:54 PM Last Post: deanhystad
	Writing a Linear Search algorithm - malformed string representation	Drone4four	10	1,157	Jan-10-2024, 08:39 AM Last Post: gulshan212
	file open "file not found error"	shanoger	8	1,364	Dec-14-2023, 08:03 AM Last Post: shanoger
	python Read each xlsx file and write it into csv with pipe delimiter	mg24	4	1,701	Nov-09-2023, 10:56 AM Last Post: mg24
	Search Excel File with a list of values	huzzug	4	1,357	Nov-03-2023, 05:35 PM Last Post: huzzug

Help with python code to search string in one file & replace with line in other file

User Panel Messages

Announcements