Ok Check this out and get back. I think it's what you are looking for. It replaces everything from the match up to the next '>' record.
It looks for the files to be in a directory named data which is a sub-directory of wherever the code is. You mat want to change this.
you can run it from the command line with a command that looks like:
# Replace header in bodyfile with header in header file, writing output to outputfile Larz60+
#
It looks for the files to be in a directory named data which is a sub-directory of wherever the code is. You mat want to change this.
you can run it from the command line with a command that looks like:
python WhateverYouCallIt.py -i File1.txt -b File2.txt -o Fileout.txt > data/results.txtcode:
# Replace header in bodyfile with header in header file, writing output to outputfile Larz60+
#
from pathlib import Path import argparse class SwapHeaders: def __init__(self, origfile=None, headerfile=None, outfile=None): self.home = Path('.') self.data = self.home / 'data' self.original_file = self.data / origfile self.header_file = self.data / headerfile self.out_file = self.data / outfile with self.header_file.open() as fh: self.new_data = fh.readlines() self.make_new_file() def get_orig_rec(self): with self.original_file.open() as forig: for line in forig: yield line def get_match(self, match_this, fo): found = False for line in self.new_data: if line.startswith('>'): if found: break if match_this in line: found = True if found: fo.write(line) def make_new_file(self): with self.out_file.open('w') as fo: skip = False for line in self.get_orig_rec(): if line.startswith('>'): if skip: skip = False match = line[1:] x = match.rfind('.') if x: match = match[:x] skip = self.get_match(match, fo) if skip: continue fo.write(line) def debug_main(): SwapHeaders(origfile='File1.txt', headerfile='File2.txt', outfile='Fileout.txt') def main(): parser = argparse.ArgumentParser() parser.add_argument("-i", "--ifile", dest='original_filename', help="Filename where headers are to be replaced", action="store") parser.add_argument("-b", "--bfile", dest='replace_original_filename', help="Filename containing body", action="store") parser.add_argument("-o", "--ofile", dest='out_filename', help="Output filename", action="store") args = parser.parse_args() original_filename = args.original_filename replace_original_filename = args.replace_original_filename out_filename = args.out_filename SwapHeaders(origfile=original_filename, headerfile=replace_original_filename, outfile=out_filename) if __name__ == '__main__': main() # debug_main()partial results:
Output:>OFAS009268-RA-EXON07 |design:coreoidea-v1,designer:forthman,probes-locus:OFAS009268-RA-EXON07,probes-probe:,probes-source:Clavigralla_tomentosicollis_gi_512427643_gb_GAJX01006991.1
TTCTACACAAACTGCTTTGCACTGAGCACCATTAAAATCATCTGTTGACCTTGCAAGTTCTTCAAAATTTACATCAACGCTAATATTCATTTTCCGAGAATGTATTTGCATAATTCGAGCACGGGCATCTTCATTTGGATGAGGAAATTCAATTTTTCTGTCTAGCCTGCCTGATCGGAGAAGGGCTGGATCTAATATATCAACTCTGTTAGTTGCTGCAATG
>Clavigralla_tomentosicollis_gi_512427643_gb_GAJX01006991.1_0_rc
GCTCGAATTATGCAAATACATTCTCGGAAAATGAATATTAGCGTTGATGTAAATTTTGAAGAACTTGCAAGGTCAACAGATGATTTTAATGGTGCTCAGTGCAAAGCAGTTTGTGTAGAA
>OFAS009268-RA-EXON07 |design:coreoidea-v1,designer:forthman,probes-locus:OFAS009268-RA-EXON07,probes-probe:,probes-source:Clavigralla_tomentosicollis_gi_512427643_gb_GAJX01006991.1
TTCTACACAAACTGCTTTGCACTGAGCACCATTAAAATCATCTGTTGACCTTGCAAGTTCTTCAAAATTTACATCAACGCTAATATTCATTTTCCGAGAATGTATTTGCATAATTCGAGCACGGGCATCTTCATTTGGATGAGGAAATTCAATTTTTCTGTCTAGCCTGCCTGATCGGAGAAGGGCTGGATCTAATATATCAACTCTGTTAGTTGCTGCAATG
>Clavigralla_tomentosicollis_gi_512427643_gb_GAJX01006991.1_35_rc
AAATTGAATTTCCTCATCCAAATGAAGATGCCCGTGCTCGAATTATGCAAATACATTCTCGGAAAATGAATATTAGCGTTGATGTAAATTTTGAAGAACTTGCAAGGTCAACAGATGATT
>Anasa_tristis_comp3229_c0_seq1_136_rc
TCAGCCAATCATAGTGGAACCGATTTCCAGTGGAGACGAACTCCGAACTGATATTCATGGAATGGAAACACAAATAAACACTTTAGGTTCTAATAACATTGTATGTGTTCTTTCAACAAC
>uce-3225_p7 |design:hemiptera-v1,designer:faircloth,probes-locus:uce-3225,probes-probe:7,probes-source:halhal1,probes-global-chromo:Scaffold629,probes-global-start:410155,probes-global-end:410275,probes-local-start:0,probes-local-end:120
AAATCCATCAAGAAATACCAACAACAACTTAAGGATGTCCAGACCGCACTCGAGGAAGAACAAAGAGCTAGGGATGATGCCCGAGAACAACTTGGTATTGCCGAAAGGCGAGCCAACGCT