合并cds
In [24]:
import re
import os

print(os.getcwd())
cid = ''
seq = ''
cds = {}
with open('CDS1.fa') as f:
    for line in f:
        if line.startswith('>'):
            pattern = re.compile(r'>.*.cds')
            m = pattern.match(line)
            tname = m.group()
            if tname != cid:
                cds[cid] = seq
                cid = tname
                seq = ''
        else:
            seq += line.strip()
# print(cds)
# 有一个空的键和值
outfile = open('cds.fa', 'w')
for key, value in cds.items():
    # print(key + '\n' + value + '\n')
    outfile.write(key + '\n' + value + '\n')
outfile.close()
    
    
/Users/zhengyangqi/我的文件/ZK-wang



>Pt1g020530.1.cds
ATGGATGCTCCTTTGGCAGCTTGGCCATGGGATAACCTAGGCATGTTCAAGTATGTGCTGTATGGACCACTCGTCGGAAAAGCTTTGTACTCATGGGTTTATGAAGATAAACGAATTGAATATTGGTGCCTCCATATTCTGATCATCGCCGTGCTTAGAGGACTAATTCATATCTTTTGGAGCTCTTTCAGTAACATGCTTTTCCTTAATCGTACTCGCCAGATTAATCAACGGGGAGTCGATTTCAAGCAGATTGATAATGAATGGAACTGGGATAATTTCATTCTACTTCAAGCTGCAATTGCATCCATGGGCTATTACATCTTTCCATGCTCTGAAAGCCTTCCTCGATGGAACACAAAAGGATTTATTGCACTACTGATACTTCATGTGGCTGTTTCGGAGCCTTTATATTACGTTTTACACAGACATTTTCACAGAAATAAATACCTTTTCACCCATTACCATTCACTCCACCATTCATCTCCAGTACCACAAATTCCAACAGCTGGGCATGCAACATTATTGGAGCACATTGTATTAAGTTTCATCGTTGCAATTCCAATTCTCGGATCTTCTATCATCGGATATGGATCAATAAGCTTGATTTATGGCTATATTTTGATGTTTGATTTTCTAAGATGCCTGGGGCATTGCAATGTTGAAATTATTCCCTATCGGTGGTTCGAAACTTTCCCATTTCTTCGATATCTTCTTTATACACCCACGTACCACAGCCTGCACCACACTGAGAAGGACTCCAATTTCTGTCTCTTTATGCCTCTCTTTGATGCCCTGGGAAATACACTTAATAGCAAATCCTGGGAAGATCATAAGAAAATTACTTCAGCTTCTGGGGAAAATGTGAGGGTCCCGGATTTTGTTTTCCTAGCGCATGTGGTCGATGTAACAGCATCAATGCATCCACCGTTTATTTTGAGATCAGTAGCTTCATTGCCATTCTCACCAAAGCTCTTTTTGCTGCCTTTTTGGCCCATTGCATTTTCAGCAATCTTCGCTTTGTGGGCATGGTCTAAGACTTTTCTAATCAATTTCTACTGGCTTAGAGGCAGGTTGCACCAGACTTGGGCTGTACCTAGATATGGCTTTCAGTACTTCTTGCCATTTGCTCAAACGGGAATCAATAAGCAAATAGAGGATGCCATCCTAAGGGCTGATAGACTTGGGGTTAAGGTCCTTAGCCTTGCTGCATTGAATAAGAACGAAACACTAAATGGCGGTGGCACTCTTTTTGTTGACAAGCACCCCAACCTTAAAGTTAGAGTTGTGCATGGAAATACATTTACGGCTGCAGTTATTTTGAATGAGCTTCCAAAGGATGTTAAAGAAGTATTTTTAACAGGAGCTACTTCGAAGCTTGGAAGAGCGATTGCTCTTTATCTCTGCCGAAAAAGAGTTCGAGTACTGATGCTGACTCTATCAACAGAAAGATTCCAGAAAATTCAGAAAGAAGCACCTATAGACTGTCAAAACTACCTTGTTGAAGTGACAAAATACCAAGCAGCTCAACATTGCAAGACATGGATTGTTGGCAAATGGATCACACCAAGGGAGCAAAATTGGGCGCCGCCAGGAACGCATTTTCATCAGTTTGTTGTGCCACCAATATTGCATTTTAGAAGAGATTGCACTTACGGAGACCTTGCCGCCATGAGATTGCCTGACGATGTTGAAGGACTTGGAACTTGTGAGTACACCATGGACCGTGGAGTAGTTCATGCATGCCATGCGGGAGGTGTGGTTCATCTTTTAGAAGGATGGACTCACCATGAAGTTGGGGCAATTGATGTTGACAAGATCGATTTAGTGTGGGAAGCTGCACTCAAGCATGGCTTCAAGCCAGTATCAAGCCTCAGGAATCGTCAGATTTCATCATAA

>Pt3g041200.1.cds
ATGGCTTCTAAACCAGGAATCCTCACCGAATGGCCATGGAAACCTCTTGGAAGCTATAAGCATGTGCTCCTGGCTCCATGGGCGATGCATAGCATATACTGTTTTATAGGGAGTAGAAAGAGTGAGCGAAACTATGCTTACTTCCTGATATTCCCTTTTCTGCTGCTGAGGATGCTTCATGACCAGATTTGGATTTCTCTTTCACGTTACCGAACAGCCAAAAGAAACAACAGGATCGTTGACAAGGCCATCGAATTCGACCAAGTTGACAGAGAAAGAGATTGGGATGACCAGATCGTGTTCAATGGACTGATATTCTATATAGTCCGCATGCTAATTCCTCCAAGTTATTCAAACCTGCCTCTCTGGAGAAGCGATGGTGTGATTCTTACGATTCTGATGCATATGGGTCCAGTAGAGTTTCTCTATTACTGGTTCCACAGAGCACTGCACCACCATTACCTCTACTCTCGCTACCATTCTCATCACCATTCTTCAATTGTCACAGAGCCCATTACTTCTGTGATTCATCCATTCGCCGAACACATTGTGTATTTCTTGCTCTTCGCAATACCACTGGGCACGACAGTGGTCCTCAAAAATGCTTCCATAGCATCTTTTGTTGGTTACATCATATACGTCGACTTCATGAACAACATGGGCCACTGTAACTTCGAGTTTGTCCCTATGTGGCTCTTCACCGTCTTTCCCCCTCTCAAGTTTCTTATGTATACGCCCTCGTATCACTCGCTGCACCACACTCAATTTCGGACCAACTACTCGCTATTTATGCCAATTTATGACTACATATACGGTACAATAGACAGAAGTTCAGATTCAGTGTACGAAAAATCACTAAAAAGATCAGGTGAAGAAGAAGAAGAATCAGCTGACGATGTGGACGTGGTACATCTAACGCATCTAACGACGCCGGAATCAATTTATCATCTGCGGATAGGATTTGCCTCCTTGGCATCAAAGCCCCATCGCTATACCTATACATTATCACAGTGGTATCTACAGCTGTTGTGGCCTTTCACAGCTTCTTGTTCTGTCCTTGTGAGTTGGATCTATGGCCGGACTTTTGTTTCAGAGAGCAACACTTTGGACAAACTCAAATTGCAAACCTGGGTGGTACCGAGGTACATTGTGCAATATAACTTGCCATGGAGAAGAGAAGCTATTAATAGCTTGATAGAAGAAGCCATATTAGAAGCAGATGCGAAAGGGGTAAAAGTTATAAGTCTAGGGCTTCTGAATCAGGGAGAGGAGCTTAACAGAAACGGAGAGATATACCTGGAAAGACACCCTAATAAGCTAAAAATCAAAGTGGTGGACGGAAGTAGCTTGGCAGCGGCGGTTGTGGTGAACAGCTTACCAAAAGCCACAGCCCACGTGCTTCTTAGAGGCACTGTTACCGCCAATAAGGTCGCTAACGCAGTTGCCTCATCTCTATGCCAAATGGGCATCAAGGTAGCCACGTTATGCAAGGACGATTATGAGAAGCTTAAGCTCAGGATCCCTGTGGAGGCTCAACATAATTTGGTCCTGTCAACAAGTTACGCTCACAGCACGAAGATTTGGTTAGTGGGAGGCAATTTGACAGGAAAGGAACAAGGAAGGGCACCAAAAGGCACAATATTCATTCCGTATACACAGATACCACCAAGGAAATTGCGGAAAGATTGCTTCTACCATTCGACTCCAGCAATGATAATTCCTCCCTCTTTAAATAACATGCATTCCTGTGAGAACTGGCTGGGAAGGAGGGTGATGAGTGCTTGGCGTATAGCTGGAATAATACATGCGTTGGAGGGATGGGATTTGAACGAGTGTGGGCAAACTATGTGCGACATCCACCAAGTTTGGCATGCCTCTCTCCGCCATGGATTCCGCCCTCTTTTCCATGTTGCTTGA

>Pt3g041200.2.cds
ATGGCTTCTAAACCAGGAATCCTCACCGAATGGCCATGGAAACCTCTTGGAAGCTATAAGCATGTGCTCCTGGCTCCATGGGCGATGCATAGCATATACTGTTTTATAGGGAGTAGAAAGAGTGAGCGAAACTATGCTTACTTCCTGATATTCCCTTTTCTGCTGCTGAGGATGCTTCATGACCAGATTTGGATTTCTCTTTCACGTTACCGAACAGCCAAAAGAAACAACAGGATCGTTGACAAGGCCATCGAATTCGACCAAGTTGACAGAGAAAGAGATTGGGATGACCAGATCGTGTTCAATGGACTGATATTCTATATAGTCCGCATGCTAATTCCTCCAAGTTATTCAAACCTGCCTCTCTGGAGAAGCGATGGTGTGATTCTTACGATTCTGATGCATATGGGTCCAGTAGAGTTTCTCTATTACTGGTTCCACAGAGCACTGCACCACCATTACCTCTACTCTCGCTACCATTCTCATCACCATTCTTCAATTGTCACAGAGCCCATTACTTCTGTGATTCATCCATTCGCCGAACACATTGTGTATTTCTTGCTCTTCGCAATACCACTGGGCACGACAGTGGTCCTCAAAAATGCTTCCATAGCATCTTTTGTTGGTTACATCATATACGTCGACTTCATGAACAACATGGGCCACTGTAACTTCGAGTTTGTCCCTATGTGGCTCTTCACCGTCTTTCCCCCTCTCAAGTTTCTTATGTATACGCCCTCGTATCACTCGCTGCACCACACTCAATTTCGGACCAACTACTCGCTATTTATGCCAATTTATGACTACATATACGGTACAATAGACAGAAGTTCAGATTCAGTGTACGAAAAATCACTAAAAAGATCAGGTGAAGAAGAAGAAGAATCAGCTGACGATGTGGACGTGGTACATCTAACGCATCTAACGACGCCGGAATCAATTTATCATCTGCGGATAGGATTTGCCTCCTTGGCATCAAAGCCCCATCGCTATACCTATACATTATCACAGTGGTATCTACAGCTGTTGTGGCCTTTCACAGCTTCTTGTTCTGTCCTTGTGAGTTGGATCTATGGCCGGACTTTTGTTTCAGAGAGCAACACTTTGGACAAACTCAAATTGCAAACCTGGGTGGTACCGAGGTACATTGTGCAATATAACTTGCCATGGAGAAGAGAAGCTATTAATAGCTTGATAGAAGAAGCCATATTAGAAGCAGATGCGAAAGGGGTAAAAGTTATAAGTCTAGGGCTTCTGAATCAGGGAGAGGAGCTTAACAGAAACGGAGAGATATACCTGGAAAGACACCCTAATAAGCTAAAAATCAAAGTGGTGGACGGAAGTAGCTTGGCAGCGGCGGTTGTGGTGAACAGCTTACCAAAAGCCACAGCCCACGTGCTTCTTAGAGGCACTGTTACCGCCAATAAGGTCGCTAACGCAGTTGCCTCATCTCTATGCCAAATGGGCATCAAGGTAGCCACGTTATGCAAGGACGATTATGAGAAGCTTAAGCTCAGGATCCCTGTGGAGGCTCAACATAATTTGGTCCTGTCAACAAGTTACGCTCACAGCACGATTTGGTTAGTGGGAGGCAATTTGACAGGAAAGGAACAAGGAAGGGCACCAAAAGGCACAATATTCATTCCGTATACACAGATACCACCAAGGAAATTGCGGAAAGATTGCTTCTACCATTCGACTCCAGCAATGATAATTCCTCCCTCTTTAAATAACATGCATTCCTGTGAGAACTGGCTGGGAAGGAGGGTGATGAGTGCTTGGCGTATAGCTGGAATAATACATGCGTTGGAGGGATGGGATTTGAACGAGTGTGGGCAAACTATGTGCGACATCCACCAAGTTTGGCATGCCTCTCTCCGCCATGGATTCCGCCCTCTTTTCCATGTTGCTTGA

>Pt3g041210.1.cds
ATGGCTTCGAAACCTGGATTTCTCACTGATTGGCCATGGACGCCTCTTGGAAACTTCAAGTACGTAGTATTGGCTCCTTGGATAATTCACAGCACGTACTCATTCATCGTAAAGGATGAGAAGGAGAGGGAGCTAGCCTACTTTATGATATTCCCATTGATGTTATGGAGAATGCTTCACAACCAGATATGGATCAGCTTTTCCCGTTACCGAACAGCCAAAGGCAGTAACAGAATCGTCGACAAGGCTATTGAATTCGAGCAAGTTGATAGAGAAAGAAATTGGGATGACCAAATAATATTCAACGGGATCCTGTTTTACGTATTCGTTAAAATAATTCCAGGCGCATCTCAAATGCCCATTTGGAGATTCGACGGTTTGATTCTCATAGCACTGCTGCATGCTGGTCCGGTGGAGTTCCTCTACTACTGGCTTCACAGAGCACTCCATCATCATTACCTTTACTCTCGCTACCATTCCCACCACCATTCCTCCATCGTCACTGAACCTATCACTTCTGTGATTCATCCATTTGCAGAGCACATAGCGTACTTCGCACTATTTGCAATACCATTGATTACACCATTGCTGAGTGGGATGGGCTCAATAGCATCCATATTCGGTTACCTCACTTACATAGATTTGATGAACAACATGGGTCACTGCAATTTCGAACTCATGCCCAGCTGCCTTCTCACCAACTTTCCTCCTCTCAAGTACCTCGTGTACACCGCGTCGTTCCACTCACTGCATCACACGCAATTCCGGACCAATTATTCGTTATTTATGCCCGTATACGATTACATATATGGCACCGTGGACAAAACTTCGGATGCATTATATGAAACTAGTCTAAAGAGACAGGAAGACTCGCCCGATGTTGTGCATCTCACGCACCTAACAACACCTGAATCAATCTACCATATGCGACTTGGTTTTGCCTCCATGGCGTCTAAGCCCCATGATCACCATACATCATCAAAGTGGTATATGTGGTTAATGTGGCCTGTCACAGTATGGTCCATGATGTTCACTTGGATTTATGGTCGTACCTTTGTGGTTGAGAGGAATCACCTTAATAAATTCAAACTACAGACTTGGGCAATTCCCAGATACAACTTTCAATATTTGTTGCTGCGGCAAAATGAATCGATCAATAGGTTGATTGAAGAAGCCATACTAGAAGCTGAGGAAAAAGGAGCTAAAGTGATAAGTCTAGGTCTCATGAATCAAGGAGAGGAGCTTAACTGTTATGGTGGGGTATTCGTGCACAAGCATCCTCAGCTTAAAATAAAGGTAGTGGACGGGAGTAGCTTAGCAGTAGCAGTAGTGATAAACAGCATACCAAAGGGAACAACACAAGTGGTCCTTAGAGGCGCTCTCACAAAGGTCGCTTATGCCATTGCCTTTGCCTTATGCCAAAAGGGCATCCAGGTTGTAACATTACGTGAGGATGAGCACGAGAAGCTTAGAAAATCGTTTGGGGCCAAATCTGAATGTAATAATTTGCTTCTCTCGAGAAGCTACTCCCAAAAGATATGGTTGGTGGGAAAAGGGCTGACTGAAGAAGAACAATCCAAGGCTAAAAAGGGAACAACCTTCATTCCTTTCTCACAGTTTCCACCAAACGATAAGAAAATACGTAAAGACTGTATGTACCATCTCACACCAGCAATGGCCGTTCCTGCTGATTTTGAGAATGTGGACTCGTGCGAGAATTGGTTGCCAAGAAGAGTGATGAGTGCATGGCGAATTGGGGGAATAGTGCATGCCTTGGAAGGATGGAACGAACACGAGTGTGGTTACGCCATCTCCAACATTCACAATGTTTGGGAAGCTGCTCTTCGACATGGCTTTCACCCTCTGACCGCTACCATTCTTACTCAATCCTATCCTATCTAG

>PtUn030220.1.cds
ATGGCTTCGAAACCAGGAATTCTCACTGATTGGCCATGGACACCCCTTGGAAACTTTAAGTACGTAGTATTGGCTCCGTGGATCATCCACAGCACGTATTCATTCATGGTTAAGGATGAAAAGGAGAGGGACCTACTCAACTTTCTCATATTCCCGTTTCTATTATGGAGAATGCTTCACAACCAGATATGGATCAGCCTTTCCCGTTACCGAACAGCCAAAGGCCGTAACAGGATCGTCGACAAGCCTATTGAATTCGAGCAAGTCGACAGAGAGAGAAATTGGGACGACCAAATAATATTGAGTGGAATATTGTTTTACGTTGTTTTCGGCAAAATGCTTCCAGGCGGAACTCAGTTGCCCATTTGGAGATTAGATGGTGTAATTCTCATGGCACTTCTGCATGCTGGTCCGGTGGAGTTCGTCTACTACTGGCTTCACAGAGCACTCCATCATCATTACCTTTACTCTCGCTATCACTCCCATCACCATTCCTCTATCGTCACTGAACCTATCACTTCTGTTACTCATCCATTTGCTGAGCACATAGCATATTTCGTTCTATTTGCAACACCATTGATTACAACAGTGCTGACTGGGGCCGGGTCAATAATACTTGCCTTCGGCTACATCACTTACATAGACTTAATGAATAACATGGGTCACTGCAATTTCGAGCTCATACCTAAATGGCTTTTCACCATTTTTCCTCCTCTCAAGTACCTCATGTACACCCCTTCGTACCACTCACTGCATCATACGCAGTTCCGGGCGAATTACTCGTTATTCATGCCTTTATACGATTACTTATTCAGTACTGTCGACAAAACTTCGGATACATTATATGAAACCAGTCTCAAGAAACAGGATGATTCACTGGATGTTGTTTACCTCACACACCTGACAACGCCTGAATCAATCTATCATATGCGGCTTGGTTTGGCCTCATTGGCTTCTAAGCCCCATCACCATGCATCATCAGAGTGGTATAAGTGGTTGCTGTGGCCTGTCACGTTATTGTCAATGATGATCACTTGGATTTACGGCCGTACCTTTGTGGTTGAAAGGAATCGCCTTAATAAATTAAAACTACAGACTTGGGCGATATCCAAATACAATATGCAATACTTCTCGCAGCGGCAAAATGAATCGATCAATCGCTTGATTGAAGAAGCCATACTAGAAGCTGAGGAGAAAGGAGCCAGGGTGATAAGTCTAGGTCTCTTGAATCAAGGAGAGGAGCTTAACCGGTACGGTGGGCTCTTCGTGCACAAGAATCCTCAGCTTAAAATAAAGGTCGTGGATGGGAGTAGCTTAGCGGTGGCAGTACTAATAAACAGCATACCCGACGGAACAACCCAAGTGGTCATTAGAGGCATTCTCACTAAGGTTGCTTATGCCACTGCCTTTGCCTTATGCCAAAAGGGAATTCAGGTAGTAACTTTACGTGAGGATGAGCATGAGAAGCTTATTAGATCATTTGGGGGCAAATCTGAAAGTAAGAACTTGCTTGTTTCAAGGAGCTACTGCCAAAAGATATGGTTGGTGGGAAATGGACTGACTGAAGAAGAACAATCCAAGGCAGAAAGAGGAACAATTTTCGTTCCTTTCTCACAGTTCCCACCGGCGAAGAAAAGACGTAAAGACTGTACCTACCACCTCACACCAGCGATGGCCACTCCTGCTACTCTTGAGAATGTCGACGCCTGTGAGAATTGGTTACCAAGAAGGGTGATGAGTGCGTGGAGAATTGGGGGGATAGTGCATGCCTTGGAAGGATGGAATGAACACGAGTGTGGTTACACCATTTCCAACGTTGACACCGTCTGGGACGCTGCTCTTCGACATGGCTTTCTGCCTCTCACCATTCCAACTCAATCTTAA

>Pt6g016380.1.cds
ATGGCTTCGAAACCAGGAATTCTCACTGATTGGCCATGGACACCCCTTGGAAACTTTAAGTACATAGTATTGGCTCCTTGGATCATCCACAGCACGTATTCATTCATGGTTAAGGATGAAAAGGAGAGGGACCTACTCAACTTTCTCATATTCCCGTTTCTATTATGGAGAATGCTTCACAACCAGATATGGATCAGCCTTTCCCGTTACCGAACAGCCAAAGGCCGTAACAGGATCGTCGACAAGCATATTGAATTCGAGCAAGTTGACAGAGAGAGAAATTGGGATGACCAAATAATATTGAGTGGGATATTGTTTTACATTATTTTCCGCAAAATGCTTCCAGGCAGAACTCAGTTGCCCATTTGGAGATTAGACGGTGTGATTCTCATGGCACTTCTGCATGCTGGTCCAGTGGAGTTCGTCTACTACTGGCTTCACAGAGCACTCCATCATCATTACCTTTACTCTCGTTATCACTCCCGTCACCATTCCTCAATCGTCACTGAACCTATCACTTCTGTTACTCATCCATTTGCTGAGCACATAGCATATTTCGTTCTATTTGCAACACCATTGATTACAACAGTGCTGACTGGGGCCGGGTCAATAGTACTTGCCTTCGGCTACATCACTTACATAGACTTAATGAATAACATGGGTCACTGCAATTTCGAGCTCATACCTAAATGGCTTTTCACCATTTTTCCTCCTCTCAAGTACCTCATGTACACCCCTTCGTTCCACTCACTGCATCATACGCAGTTCCGGGCGAATTACTCGTTATTCATGCCTTTATACGATTACTTATTCAGTACTGTCGACAAAACTTCGGATACATTATATGAAACCAGTCTCAAGAAACAGGAAGATTCACCGGATGTTGTTTACCTCACGCACCTGACAACACCTGAATCAATCTATCATATGCGGCTTGGTTTGGCCTCACTGGCTTCTAAGCCCCATCACCATGCATCATCAGAGTGGTATAAGTGGTTGCCGTGGCCTGTCACGTTATTGTCGATGATGATCACTTGGATTTATGGCCGTACCTTTGTGGTTGAAAGGAATCGCCTTAATAAATTAAAACTACAGACTTGGGCGATATCCAAATACAATATGCAATACTTCTCGCAGCGGCAAAATGAATCGATCAATCGCTTGATTGAAGAAGCCATACTAGAAGCTGAGGAGAAAGGAGCTAGGGTGATAACTCTAGGTCTCTTGAATCAAGGAGAGGAGCTTAACCGGTACGGTGGGCTCTTCGTGCACAAGAATCCTGAGCTTAAAATAAAGGTAGTGAATGGGAGTAGCTTAGCGGTGGCAGTACTGACAAACAGCATACCCGACGGAACAACCCAAGTGAGAGTTTCCATCATCAGCATATGTAACTTCTCCATCTTCAACATCCTTTTCGACACTAGGATTCTCATGATCTTCCGCATCCTCATCATCGATTTCCTCTTCAACAAGGGCAACCACCCTACGATTAGGAAACTCAGAATTGCTTATGTCACTGCCTTTGCCTTATGCCAAAAGGGAATTCAGGTAGTAACTTTACGTGAGGATGAGCATGAGAAGCTTATTAGATCATTTGGAGGCAAATCTGAAAGTAAGAACTTGCTTGTTTCAAGGAGCTACTGCCAAAAGATATGGTTGGTGGGAAATGGACTGACTGAAGAAGAACAATCTAAGGCAGAAAGAGGAACAACGTTCGTTCCTTTCTCACAGTTCCCACCGGCGAAGAAAAGACGTAAAGACTGTACCTACCACCTTACACCAGCGATGGGCACTCCTGCTACTCTTGGGAATGTCGACTCATGTGAGAATTGGTTACCAAGAAGGGTGATGAGTGCGTGGAGAATTGGGGGGATAGTGCATGCCTTGGGAGGATGGAATGAACACGACTGTGGTTACACCATTTCCAACGTTGACACCATCTGGGACGCTGCTTTTCATCATGGCTTTCTACCTCTCACCATTCTAACTCAATCTTAA

In [19]:
import re
s = '>Pt1g020530.1.cds.1'
pattern = re.compile(r'>.*.cds')
m = pattern.match(s)
m.group()
Out[19]:
'>Pt1g020530.1.cds'