In [24]:
import re
import os
print(os.getcwd())
cid = ''
seq = ''
cds = {}
with open('CDS1.fa') as f:
for line in f:
if line.startswith('>'):
pattern = re.compile(r'>.*.cds')
m = pattern.match(line)
tname = m.group()
if tname != cid:
cds[cid] = seq
cid = tname
seq = ''
else:
seq += line.strip()
# print(cds)
# 有一个空的键和值
outfile = open('cds.fa', 'w')
for key, value in cds.items():
# print(key + '\n' + value + '\n')
outfile.write(key + '\n' + value + '\n')
outfile.close()
/Users/zhengyangqi/我的文件/ZK-wang >Pt1g020530.1.cds ATGGATGCTCCTTTGGCAGCTTGGCCATGGGATAACCTAGGCATGTTCAAGTATGTGCTGTATGGACCACTCGTCGGAAAAGCTTTGTACTCATGGGTTTATGAAGATAAACGAATTGAATATTGGTGCCTCCATATTCTGATCATCGCCGTGCTTAGAGGACTAATTCATATCTTTTGGAGCTCTTTCAGTAACATGCTTTTCCTTAATCGTACTCGCCAGATTAATCAACGGGGAGTCGATTTCAAGCAGATTGATAATGAATGGAACTGGGATAATTTCATTCTACTTCAAGCTGCAATTGCATCCATGGGCTATTACATCTTTCCATGCTCTGAAAGCCTTCCTCGATGGAACACAAAAGGATTTATTGCACTACTGATACTTCATGTGGCTGTTTCGGAGCCTTTATATTACGTTTTACACAGACATTTTCACAGAAATAAATACCTTTTCACCCATTACCATTCACTCCACCATTCATCTCCAGTACCACAAATTCCAACAGCTGGGCATGCAACATTATTGGAGCACATTGTATTAAGTTTCATCGTTGCAATTCCAATTCTCGGATCTTCTATCATCGGATATGGATCAATAAGCTTGATTTATGGCTATATTTTGATGTTTGATTTTCTAAGATGCCTGGGGCATTGCAATGTTGAAATTATTCCCTATCGGTGGTTCGAAACTTTCCCATTTCTTCGATATCTTCTTTATACACCCACGTACCACAGCCTGCACCACACTGAGAAGGACTCCAATTTCTGTCTCTTTATGCCTCTCTTTGATGCCCTGGGAAATACACTTAATAGCAAATCCTGGGAAGATCATAAGAAAATTACTTCAGCTTCTGGGGAAAATGTGAGGGTCCCGGATTTTGTTTTCCTAGCGCATGTGGTCGATGTAACAGCATCAATGCATCCACCGTTTATTTTGAGATCAGTAGCTTCATTGCCATTCTCACCAAAGCTCTTTTTGCTGCCTTTTTGGCCCATTGCATTTTCAGCAATCTTCGCTTTGTGGGCATGGTCTAAGACTTTTCTAATCAATTTCTACTGGCTTAGAGGCAGGTTGCACCAGACTTGGGCTGTACCTAGATATGGCTTTCAGTACTTCTTGCCATTTGCTCAAACGGGAATCAATAAGCAAATAGAGGATGCCATCCTAAGGGCTGATAGACTTGGGGTTAAGGTCCTTAGCCTTGCTGCATTGAATAAGAACGAAACACTAAATGGCGGTGGCACTCTTTTTGTTGACAAGCACCCCAACCTTAAAGTTAGAGTTGTGCATGGAAATACATTTACGGCTGCAGTTATTTTGAATGAGCTTCCAAAGGATGTTAAAGAAGTATTTTTAACAGGAGCTACTTCGAAGCTTGGAAGAGCGATTGCTCTTTATCTCTGCCGAAAAAGAGTTCGAGTACTGATGCTGACTCTATCAACAGAAAGATTCCAGAAAATTCAGAAAGAAGCACCTATAGACTGTCAAAACTACCTTGTTGAAGTGACAAAATACCAAGCAGCTCAACATTGCAAGACATGGATTGTTGGCAAATGGATCACACCAAGGGAGCAAAATTGGGCGCCGCCAGGAACGCATTTTCATCAGTTTGTTGTGCCACCAATATTGCATTTTAGAAGAGATTGCACTTACGGAGACCTTGCCGCCATGAGATTGCCTGACGATGTTGAAGGACTTGGAACTTGTGAGTACACCATGGACCGTGGAGTAGTTCATGCATGCCATGCGGGAGGTGTGGTTCATCTTTTAGAAGGATGGACTCACCATGAAGTTGGGGCAATTGATGTTGACAAGATCGATTTAGTGTGGGAAGCTGCACTCAAGCATGGCTTCAAGCCAGTATCAAGCCTCAGGAATCGTCAGATTTCATCATAA >Pt3g041200.1.cds ATGGCTTCTAAACCAGGAATCCTCACCGAATGGCCATGGAAACCTCTTGGAAGCTATAAGCATGTGCTCCTGGCTCCATGGGCGATGCATAGCATATACTGTTTTATAGGGAGTAGAAAGAGTGAGCGAAACTATGCTTACTTCCTGATATTCCCTTTTCTGCTGCTGAGGATGCTTCATGACCAGATTTGGATTTCTCTTTCACGTTACCGAACAGCCAAAAGAAACAACAGGATCGTTGACAAGGCCATCGAATTCGACCAAGTTGACAGAGAAAGAGATTGGGATGACCAGATCGTGTTCAATGGACTGATATTCTATATAGTCCGCATGCTAATTCCTCCAAGTTATTCAAACCTGCCTCTCTGGAGAAGCGATGGTGTGATTCTTACGATTCTGATGCATATGGGTCCAGTAGAGTTTCTCTATTACTGGTTCCACAGAGCACTGCACCACCATTACCTCTACTCTCGCTACCATTCTCATCACCATTCTTCAATTGTCACAGAGCCCATTACTTCTGTGATTCATCCATTCGCCGAACACATTGTGTATTTCTTGCTCTTCGCAATACCACTGGGCACGACAGTGGTCCTCAAAAATGCTTCCATAGCATCTTTTGTTGGTTACATCATATACGTCGACTTCATGAACAACATGGGCCACTGTAACTTCGAGTTTGTCCCTATGTGGCTCTTCACCGTCTTTCCCCCTCTCAAGTTTCTTATGTATACGCCCTCGTATCACTCGCTGCACCACACTCAATTTCGGACCAACTACTCGCTATTTATGCCAATTTATGACTACATATACGGTACAATAGACAGAAGTTCAGATTCAGTGTACGAAAAATCACTAAAAAGATCAGGTGAAGAAGAAGAAGAATCAGCTGACGATGTGGACGTGGTACATCTAACGCATCTAACGACGCCGGAATCAATTTATCATCTGCGGATAGGATTTGCCTCCTTGGCATCAAAGCCCCATCGCTATACCTATACATTATCACAGTGGTATCTACAGCTGTTGTGGCCTTTCACAGCTTCTTGTTCTGTCCTTGTGAGTTGGATCTATGGCCGGACTTTTGTTTCAGAGAGCAACACTTTGGACAAACTCAAATTGCAAACCTGGGTGGTACCGAGGTACATTGTGCAATATAACTTGCCATGGAGAAGAGAAGCTATTAATAGCTTGATAGAAGAAGCCATATTAGAAGCAGATGCGAAAGGGGTAAAAGTTATAAGTCTAGGGCTTCTGAATCAGGGAGAGGAGCTTAACAGAAACGGAGAGATATACCTGGAAAGACACCCTAATAAGCTAAAAATCAAAGTGGTGGACGGAAGTAGCTTGGCAGCGGCGGTTGTGGTGAACAGCTTACCAAAAGCCACAGCCCACGTGCTTCTTAGAGGCACTGTTACCGCCAATAAGGTCGCTAACGCAGTTGCCTCATCTCTATGCCAAATGGGCATCAAGGTAGCCACGTTATGCAAGGACGATTATGAGAAGCTTAAGCTCAGGATCCCTGTGGAGGCTCAACATAATTTGGTCCTGTCAACAAGTTACGCTCACAGCACGAAGATTTGGTTAGTGGGAGGCAATTTGACAGGAAAGGAACAAGGAAGGGCACCAAAAGGCACAATATTCATTCCGTATACACAGATACCACCAAGGAAATTGCGGAAAGATTGCTTCTACCATTCGACTCCAGCAATGATAATTCCTCCCTCTTTAAATAACATGCATTCCTGTGAGAACTGGCTGGGAAGGAGGGTGATGAGTGCTTGGCGTATAGCTGGAATAATACATGCGTTGGAGGGATGGGATTTGAACGAGTGTGGGCAAACTATGTGCGACATCCACCAAGTTTGGCATGCCTCTCTCCGCCATGGATTCCGCCCTCTTTTCCATGTTGCTTGA >Pt3g041200.2.cds ATGGCTTCTAAACCAGGAATCCTCACCGAATGGCCATGGAAACCTCTTGGAAGCTATAAGCATGTGCTCCTGGCTCCATGGGCGATGCATAGCATATACTGTTTTATAGGGAGTAGAAAGAGTGAGCGAAACTATGCTTACTTCCTGATATTCCCTTTTCTGCTGCTGAGGATGCTTCATGACCAGATTTGGATTTCTCTTTCACGTTACCGAACAGCCAAAAGAAACAACAGGATCGTTGACAAGGCCATCGAATTCGACCAAGTTGACAGAGAAAGAGATTGGGATGACCAGATCGTGTTCAATGGACTGATATTCTATATAGTCCGCATGCTAATTCCTCCAAGTTATTCAAACCTGCCTCTCTGGAGAAGCGATGGTGTGATTCTTACGATTCTGATGCATATGGGTCCAGTAGAGTTTCTCTATTACTGGTTCCACAGAGCACTGCACCACCATTACCTCTACTCTCGCTACCATTCTCATCACCATTCTTCAATTGTCACAGAGCCCATTACTTCTGTGATTCATCCATTCGCCGAACACATTGTGTATTTCTTGCTCTTCGCAATACCACTGGGCACGACAGTGGTCCTCAAAAATGCTTCCATAGCATCTTTTGTTGGTTACATCATATACGTCGACTTCATGAACAACATGGGCCACTGTAACTTCGAGTTTGTCCCTATGTGGCTCTTCACCGTCTTTCCCCCTCTCAAGTTTCTTATGTATACGCCCTCGTATCACTCGCTGCACCACACTCAATTTCGGACCAACTACTCGCTATTTATGCCAATTTATGACTACATATACGGTACAATAGACAGAAGTTCAGATTCAGTGTACGAAAAATCACTAAAAAGATCAGGTGAAGAAGAAGAAGAATCAGCTGACGATGTGGACGTGGTACATCTAACGCATCTAACGACGCCGGAATCAATTTATCATCTGCGGATAGGATTTGCCTCCTTGGCATCAAAGCCCCATCGCTATACCTATACATTATCACAGTGGTATCTACAGCTGTTGTGGCCTTTCACAGCTTCTTGTTCTGTCCTTGTGAGTTGGATCTATGGCCGGACTTTTGTTTCAGAGAGCAACACTTTGGACAAACTCAAATTGCAAACCTGGGTGGTACCGAGGTACATTGTGCAATATAACTTGCCATGGAGAAGAGAAGCTATTAATAGCTTGATAGAAGAAGCCATATTAGAAGCAGATGCGAAAGGGGTAAAAGTTATAAGTCTAGGGCTTCTGAATCAGGGAGAGGAGCTTAACAGAAACGGAGAGATATACCTGGAAAGACACCCTAATAAGCTAAAAATCAAAGTGGTGGACGGAAGTAGCTTGGCAGCGGCGGTTGTGGTGAACAGCTTACCAAAAGCCACAGCCCACGTGCTTCTTAGAGGCACTGTTACCGCCAATAAGGTCGCTAACGCAGTTGCCTCATCTCTATGCCAAATGGGCATCAAGGTAGCCACGTTATGCAAGGACGATTATGAGAAGCTTAAGCTCAGGATCCCTGTGGAGGCTCAACATAATTTGGTCCTGTCAACAAGTTACGCTCACAGCACGATTTGGTTAGTGGGAGGCAATTTGACAGGAAAGGAACAAGGAAGGGCACCAAAAGGCACAATATTCATTCCGTATACACAGATACCACCAAGGAAATTGCGGAAAGATTGCTTCTACCATTCGACTCCAGCAATGATAATTCCTCCCTCTTTAAATAACATGCATTCCTGTGAGAACTGGCTGGGAAGGAGGGTGATGAGTGCTTGGCGTATAGCTGGAATAATACATGCGTTGGAGGGATGGGATTTGAACGAGTGTGGGCAAACTATGTGCGACATCCACCAAGTTTGGCATGCCTCTCTCCGCCATGGATTCCGCCCTCTTTTCCATGTTGCTTGA >Pt3g041210.1.cds ATGGCTTCGAAACCTGGATTTCTCACTGATTGGCCATGGACGCCTCTTGGAAACTTCAAGTACGTAGTATTGGCTCCTTGGATAATTCACAGCACGTACTCATTCATCGTAAAGGATGAGAAGGAGAGGGAGCTAGCCTACTTTATGATATTCCCATTGATGTTATGGAGAATGCTTCACAACCAGATATGGATCAGCTTTTCCCGTTACCGAACAGCCAAAGGCAGTAACAGAATCGTCGACAAGGCTATTGAATTCGAGCAAGTTGATAGAGAAAGAAATTGGGATGACCAAATAATATTCAACGGGATCCTGTTTTACGTATTCGTTAAAATAATTCCAGGCGCATCTCAAATGCCCATTTGGAGATTCGACGGTTTGATTCTCATAGCACTGCTGCATGCTGGTCCGGTGGAGTTCCTCTACTACTGGCTTCACAGAGCACTCCATCATCATTACCTTTACTCTCGCTACCATTCCCACCACCATTCCTCCATCGTCACTGAACCTATCACTTCTGTGATTCATCCATTTGCAGAGCACATAGCGTACTTCGCACTATTTGCAATACCATTGATTACACCATTGCTGAGTGGGATGGGCTCAATAGCATCCATATTCGGTTACCTCACTTACATAGATTTGATGAACAACATGGGTCACTGCAATTTCGAACTCATGCCCAGCTGCCTTCTCACCAACTTTCCTCCTCTCAAGTACCTCGTGTACACCGCGTCGTTCCACTCACTGCATCACACGCAATTCCGGACCAATTATTCGTTATTTATGCCCGTATACGATTACATATATGGCACCGTGGACAAAACTTCGGATGCATTATATGAAACTAGTCTAAAGAGACAGGAAGACTCGCCCGATGTTGTGCATCTCACGCACCTAACAACACCTGAATCAATCTACCATATGCGACTTGGTTTTGCCTCCATGGCGTCTAAGCCCCATGATCACCATACATCATCAAAGTGGTATATGTGGTTAATGTGGCCTGTCACAGTATGGTCCATGATGTTCACTTGGATTTATGGTCGTACCTTTGTGGTTGAGAGGAATCACCTTAATAAATTCAAACTACAGACTTGGGCAATTCCCAGATACAACTTTCAATATTTGTTGCTGCGGCAAAATGAATCGATCAATAGGTTGATTGAAGAAGCCATACTAGAAGCTGAGGAAAAAGGAGCTAAAGTGATAAGTCTAGGTCTCATGAATCAAGGAGAGGAGCTTAACTGTTATGGTGGGGTATTCGTGCACAAGCATCCTCAGCTTAAAATAAAGGTAGTGGACGGGAGTAGCTTAGCAGTAGCAGTAGTGATAAACAGCATACCAAAGGGAACAACACAAGTGGTCCTTAGAGGCGCTCTCACAAAGGTCGCTTATGCCATTGCCTTTGCCTTATGCCAAAAGGGCATCCAGGTTGTAACATTACGTGAGGATGAGCACGAGAAGCTTAGAAAATCGTTTGGGGCCAAATCTGAATGTAATAATTTGCTTCTCTCGAGAAGCTACTCCCAAAAGATATGGTTGGTGGGAAAAGGGCTGACTGAAGAAGAACAATCCAAGGCTAAAAAGGGAACAACCTTCATTCCTTTCTCACAGTTTCCACCAAACGATAAGAAAATACGTAAAGACTGTATGTACCATCTCACACCAGCAATGGCCGTTCCTGCTGATTTTGAGAATGTGGACTCGTGCGAGAATTGGTTGCCAAGAAGAGTGATGAGTGCATGGCGAATTGGGGGAATAGTGCATGCCTTGGAAGGATGGAACGAACACGAGTGTGGTTACGCCATCTCCAACATTCACAATGTTTGGGAAGCTGCTCTTCGACATGGCTTTCACCCTCTGACCGCTACCATTCTTACTCAATCCTATCCTATCTAG >PtUn030220.1.cds ATGGCTTCGAAACCAGGAATTCTCACTGATTGGCCATGGACACCCCTTGGAAACTTTAAGTACGTAGTATTGGCTCCGTGGATCATCCACAGCACGTATTCATTCATGGTTAAGGATGAAAAGGAGAGGGACCTACTCAACTTTCTCATATTCCCGTTTCTATTATGGAGAATGCTTCACAACCAGATATGGATCAGCCTTTCCCGTTACCGAACAGCCAAAGGCCGTAACAGGATCGTCGACAAGCCTATTGAATTCGAGCAAGTCGACAGAGAGAGAAATTGGGACGACCAAATAATATTGAGTGGAATATTGTTTTACGTTGTTTTCGGCAAAATGCTTCCAGGCGGAACTCAGTTGCCCATTTGGAGATTAGATGGTGTAATTCTCATGGCACTTCTGCATGCTGGTCCGGTGGAGTTCGTCTACTACTGGCTTCACAGAGCACTCCATCATCATTACCTTTACTCTCGCTATCACTCCCATCACCATTCCTCTATCGTCACTGAACCTATCACTTCTGTTACTCATCCATTTGCTGAGCACATAGCATATTTCGTTCTATTTGCAACACCATTGATTACAACAGTGCTGACTGGGGCCGGGTCAATAATACTTGCCTTCGGCTACATCACTTACATAGACTTAATGAATAACATGGGTCACTGCAATTTCGAGCTCATACCTAAATGGCTTTTCACCATTTTTCCTCCTCTCAAGTACCTCATGTACACCCCTTCGTACCACTCACTGCATCATACGCAGTTCCGGGCGAATTACTCGTTATTCATGCCTTTATACGATTACTTATTCAGTACTGTCGACAAAACTTCGGATACATTATATGAAACCAGTCTCAAGAAACAGGATGATTCACTGGATGTTGTTTACCTCACACACCTGACAACGCCTGAATCAATCTATCATATGCGGCTTGGTTTGGCCTCATTGGCTTCTAAGCCCCATCACCATGCATCATCAGAGTGGTATAAGTGGTTGCTGTGGCCTGTCACGTTATTGTCAATGATGATCACTTGGATTTACGGCCGTACCTTTGTGGTTGAAAGGAATCGCCTTAATAAATTAAAACTACAGACTTGGGCGATATCCAAATACAATATGCAATACTTCTCGCAGCGGCAAAATGAATCGATCAATCGCTTGATTGAAGAAGCCATACTAGAAGCTGAGGAGAAAGGAGCCAGGGTGATAAGTCTAGGTCTCTTGAATCAAGGAGAGGAGCTTAACCGGTACGGTGGGCTCTTCGTGCACAAGAATCCTCAGCTTAAAATAAAGGTCGTGGATGGGAGTAGCTTAGCGGTGGCAGTACTAATAAACAGCATACCCGACGGAACAACCCAAGTGGTCATTAGAGGCATTCTCACTAAGGTTGCTTATGCCACTGCCTTTGCCTTATGCCAAAAGGGAATTCAGGTAGTAACTTTACGTGAGGATGAGCATGAGAAGCTTATTAGATCATTTGGGGGCAAATCTGAAAGTAAGAACTTGCTTGTTTCAAGGAGCTACTGCCAAAAGATATGGTTGGTGGGAAATGGACTGACTGAAGAAGAACAATCCAAGGCAGAAAGAGGAACAATTTTCGTTCCTTTCTCACAGTTCCCACCGGCGAAGAAAAGACGTAAAGACTGTACCTACCACCTCACACCAGCGATGGCCACTCCTGCTACTCTTGAGAATGTCGACGCCTGTGAGAATTGGTTACCAAGAAGGGTGATGAGTGCGTGGAGAATTGGGGGGATAGTGCATGCCTTGGAAGGATGGAATGAACACGAGTGTGGTTACACCATTTCCAACGTTGACACCGTCTGGGACGCTGCTCTTCGACATGGCTTTCTGCCTCTCACCATTCCAACTCAATCTTAA >Pt6g016380.1.cds ATGGCTTCGAAACCAGGAATTCTCACTGATTGGCCATGGACACCCCTTGGAAACTTTAAGTACATAGTATTGGCTCCTTGGATCATCCACAGCACGTATTCATTCATGGTTAAGGATGAAAAGGAGAGGGACCTACTCAACTTTCTCATATTCCCGTTTCTATTATGGAGAATGCTTCACAACCAGATATGGATCAGCCTTTCCCGTTACCGAACAGCCAAAGGCCGTAACAGGATCGTCGACAAGCATATTGAATTCGAGCAAGTTGACAGAGAGAGAAATTGGGATGACCAAATAATATTGAGTGGGATATTGTTTTACATTATTTTCCGCAAAATGCTTCCAGGCAGAACTCAGTTGCCCATTTGGAGATTAGACGGTGTGATTCTCATGGCACTTCTGCATGCTGGTCCAGTGGAGTTCGTCTACTACTGGCTTCACAGAGCACTCCATCATCATTACCTTTACTCTCGTTATCACTCCCGTCACCATTCCTCAATCGTCACTGAACCTATCACTTCTGTTACTCATCCATTTGCTGAGCACATAGCATATTTCGTTCTATTTGCAACACCATTGATTACAACAGTGCTGACTGGGGCCGGGTCAATAGTACTTGCCTTCGGCTACATCACTTACATAGACTTAATGAATAACATGGGTCACTGCAATTTCGAGCTCATACCTAAATGGCTTTTCACCATTTTTCCTCCTCTCAAGTACCTCATGTACACCCCTTCGTTCCACTCACTGCATCATACGCAGTTCCGGGCGAATTACTCGTTATTCATGCCTTTATACGATTACTTATTCAGTACTGTCGACAAAACTTCGGATACATTATATGAAACCAGTCTCAAGAAACAGGAAGATTCACCGGATGTTGTTTACCTCACGCACCTGACAACACCTGAATCAATCTATCATATGCGGCTTGGTTTGGCCTCACTGGCTTCTAAGCCCCATCACCATGCATCATCAGAGTGGTATAAGTGGTTGCCGTGGCCTGTCACGTTATTGTCGATGATGATCACTTGGATTTATGGCCGTACCTTTGTGGTTGAAAGGAATCGCCTTAATAAATTAAAACTACAGACTTGGGCGATATCCAAATACAATATGCAATACTTCTCGCAGCGGCAAAATGAATCGATCAATCGCTTGATTGAAGAAGCCATACTAGAAGCTGAGGAGAAAGGAGCTAGGGTGATAACTCTAGGTCTCTTGAATCAAGGAGAGGAGCTTAACCGGTACGGTGGGCTCTTCGTGCACAAGAATCCTGAGCTTAAAATAAAGGTAGTGAATGGGAGTAGCTTAGCGGTGGCAGTACTGACAAACAGCATACCCGACGGAACAACCCAAGTGAGAGTTTCCATCATCAGCATATGTAACTTCTCCATCTTCAACATCCTTTTCGACACTAGGATTCTCATGATCTTCCGCATCCTCATCATCGATTTCCTCTTCAACAAGGGCAACCACCCTACGATTAGGAAACTCAGAATTGCTTATGTCACTGCCTTTGCCTTATGCCAAAAGGGAATTCAGGTAGTAACTTTACGTGAGGATGAGCATGAGAAGCTTATTAGATCATTTGGAGGCAAATCTGAAAGTAAGAACTTGCTTGTTTCAAGGAGCTACTGCCAAAAGATATGGTTGGTGGGAAATGGACTGACTGAAGAAGAACAATCTAAGGCAGAAAGAGGAACAACGTTCGTTCCTTTCTCACAGTTCCCACCGGCGAAGAAAAGACGTAAAGACTGTACCTACCACCTTACACCAGCGATGGGCACTCCTGCTACTCTTGGGAATGTCGACTCATGTGAGAATTGGTTACCAAGAAGGGTGATGAGTGCGTGGAGAATTGGGGGGATAGTGCATGCCTTGGGAGGATGGAATGAACACGACTGTGGTTACACCATTTCCAACGTTGACACCATCTGGGACGCTGCTTTTCATCATGGCTTTCTACCTCTCACCATTCTAACTCAATCTTAA
In [19]:
import re
s = '>Pt1g020530.1.cds.1'
pattern = re.compile(r'>.*.cds')
m = pattern.match(s)
m.group()
Out[19]:
'>Pt1g020530.1.cds'