From b2b778b792501ceed449105f9aac2c6d45bdd1ae Mon Sep 17 00:00:00 2001 From: Teddy Warner Date: Mon, 13 Mar 2023 17:11:09 -0400 Subject: [PATCH 1/5] add findSurfaceResiduesListCharged.py --- findSurfaceResiduesListCharged.py | 195 ++++++++++++++++++++++++++++++ 1 file changed, 195 insertions(+) create mode 100644 findSurfaceResiduesListCharged.py diff --git a/findSurfaceResiduesListCharged.py b/findSurfaceResiduesListCharged.py new file mode 100644 index 00000000..de37224d --- /dev/null +++ b/findSurfaceResiduesListCharged.py @@ -0,0 +1,195 @@ +''' +http://pymolwiki.org/index.php/FindSurfaceResiduesListCharged +''' + +from __future__ import print_function +from pymol import cmd + + +def findSurfaceAtoms(selection="all",cutoff=2.5, quiet=1): + """ +DESCRIPTION + + Finds those atoms on the surface of a protein + that have at least 'cutoff' exposed A**2 surface area. + +USAGE + + findSurfaceAtoms [ selection, [ cutoff ]] + +SEE ALSO + + findSurfaceResidues + """ + cutoff, quiet = float(cutoff), int(quiet) + + tmpObj = cmd.get_unused_name("_tmp") + cmd.create(tmpObj, "(" + selection + ") and polymer", zoom=0) + + cmd.set("dot_solvent", 1, tmpObj) + cmd.get_area(selection=tmpObj, load_b=1) + + # threshold on what one considers an "exposed" atom (in A**2): + cmd.remove(tmpObj + " and b < " + str(cutoff)) + + selName = cmd.get_unused_name("exposed_atm_") + cmd.select(selName, "(" + selection + ") in " + tmpObj) + + cmd.delete(tmpObj) + + if not quiet: + print("Exposed atoms are selected in: " + selName) + + return selName + + +def findSurfaceResiduesListCharged(pH=7.0, selection="all", cutoff=2.5, doShow=0, quiet=1): + """ +DESCRIPTION + + Identifies and lists all charged surface residues. Also calculates a + surface charge at entered pH. + +USAGE + + findSurfaceResiduesListCharged [pH, [selection ,[cutoff ,[ doShow , [ quiet]]]]]] + +ARGUMENTS + + pH = The pH value to estimate a surface charge at + + selection = string: object or selection in which to find exposed + residues {default: all} + + cutoff = float: cutoff of what is exposed or not {default: 2.5 Ang**2} + +RETURNS + + (list: (chain, resv ) ) + A Python list of residue numbers corresponding + to those residues w/more exposure than the cutoff. + + A printout of all charged amino acids and the estimated surface charge of a protein. + + """ + cutoff, doShow, quiet = float(cutoff), int(doShow), int(quiet) + + selName = findSurfaceAtoms(selection, cutoff, quiet) + + exposed = set() + cmd.iterate(selName, "exposed.add((chain,resv))", space=locals()) + + selNameRes = cmd.get_unused_name("exposed_res_") + cmd.select(selNameRes, "byres " + selName) + + if not quiet: + print("Exposed residues are selected in: " + selNameRes) + + if doShow: + cmd.show_as("spheres", "(" + selection + ") and polymer") + cmd.color("white", selection) + cmd.color("yellow", selNameRes) + cmd.color("red", selName) + exposed=sorted(exposed) #list of exposed residues + seq=cmd.get_fastastr('all') + newseq = seq.replace('\n', ' ').replace('\r', '').replace(' ','').replace("?","") + nDomains=newseq.count(">") + + def splitSeq(seq,d): #splits residues into their domains + domain=-1 + count=0 + splits=[""]*d + for r in seq: + if r==">": + domain+=1 + count=7 + if count>0: + count-=1 + continue + if r=="\n": + continue + splits[domain]+=r + return splits + + splits=splitSeq(newseq,nDomains) #list of all residues separated by domain + + #gets all charged amino acids on the surface + reslist=[[] for x in range(nDomains)] + exposedAms="" + K=0 + R=0 + D=0 + H=0 + E=0 + + for r in exposed: + tempDomain=ord(r[0])-65 + if r[1]-1 >= len(splits[tempDomain]): + continue + amino=splits[tempDomain][r[1]-1] + if amino not in "KRDHE": + continue + elif amino=='K': + K+=1 + elif amino=='R': + R+=1 + elif amino=='D': + D+=1 + elif amino=="H": + H+=1 + elif amino=='E': + E+=1 + exposedAms+=amino + amPos=amino+str(r[1]) + reslist[tempDomain].append(amPos) + + n=1 + for dom in reslist: + print("Domain " + str(n)+" charged residues:") + print(dom) + n+=1 + + print("Charged residue list") + print(exposedAms) + print("Number of exposed charged residues") + print(len(exposedAms)) + + if pH > 10.54: + kCharge=10 ** -(pH-10.54) + else: + kCharge=1-(10** (pH-10.54)) + + if pH > 12.48: + rCharge=10 ** -(pH-12.48) + else: + rCharge=1-(10** (pH-12.48)) + + if pH > 4.07: + dCharge=-(1-(10**-(pH-4.07))) + else: + dCharge=-(10**(pH-4.07)) + + if pH > 3.90: + eCharge=-(1-(10**-(pH-3.90))) + else: + eCharge=-(10**(pH-3.90)) + + if pH >6.04: + hCharge=10 ** -(pH-6.04) + else: + hCharge=1-(10** (pH-6.04)) + + + charge=kCharge*K+rCharge*R+hCharge*H+dCharge*D+eCharge*E + charge=round(charge,2) + if charge>0: + chargetx="+"+str(charge) + else: + chargetx=str(charge) + + + print ("The expected surface charge of this protein at pH " + str(pH) +" is: " +chargetx) + return sorted(exposed) + +cmd.extend("findSurfaceAtoms", findSurfaceAtoms) +cmd.extend("findSurfaceResiduesListCharged", findSurfaceResiduesListCharged) From c64efe156142b5dcd3c9bab76f39a592fa90aca4 Mon Sep 17 00:00:00 2001 From: Teddy Warner Date: Mon, 20 Mar 2023 17:52:11 -0400 Subject: [PATCH 2/5] add findSurfaceCharge.py --- findSurfaceCharge.py | 163 +++++++++++++++++++++++++ findSurfaceResiduesListCharged.py | 195 ------------------------------ 2 files changed, 163 insertions(+), 195 deletions(-) create mode 100644 findSurfaceCharge.py delete mode 100644 findSurfaceResiduesListCharged.py diff --git a/findSurfaceCharge.py b/findSurfaceCharge.py new file mode 100644 index 00000000..8a48b4eb --- /dev/null +++ b/findSurfaceCharge.py @@ -0,0 +1,163 @@ +from __future__ import print_function +from pymol import cmd + + +def findSurfaceAtoms(selection="all",cutoff=2.5): + """ + Adapted from Jason Vertrees https://pymolwiki.org/index.php/FindSurfaceResidues +DESCRIPTION + + Finds those atoms on the surface of a protein + that have at least 'cutoff' exposed A**2 surface area. + +USAGE + + findSurfaceAtoms [ selection, [ cutoff ]] + +SEE ALSO + + findSurfaceResidues + """ + cutoff = float(cutoff) + + tmpObj = cmd.get_unused_name("_tmp") + cmd.create(tmpObj, "(" + selection + ") and polymer", zoom=0) + + cmd.set("dot_solvent", 1, tmpObj) + cmd.get_area(selection=tmpObj, load_b=1) + + # threshold on what one considers an "exposed" atom (in A**2): + cmd.remove(tmpObj + " and b < " + str(cutoff)) + + selName = cmd.get_unused_name("exposed_atm_") + + cmd.select(selName, "(" + selection + ") in " + tmpObj) + + cmd.delete(tmpObj) + + return selName + + +def findSurfaceCharge(pH=7.0, folded=True, selection="all", cutoff=2.5): + """ +DESCRIPTION + + Calculates a surface charge at entered pH. Also allows for the charge of an unfolded protein to be calculated. + +USAGE + + findSurfaceCharge [pH, [folded, [selection ,[cutoff]]]] + +ARGUMENTS + + pH = The pH value to estimate a surface charge at + + folded = Whether the protein is folded (True) or denatured (False) + + selection = string: object or selection in which to find exposed + residues {default: all} + + cutoff = float: cutoff of what is exposed or not {default: 2.5 Ang**2} + +RETURNS + + A printout of the estimated surface charge at a given pH + + """ + cutoff = float(cutoff) + + selName = findSurfaceAtoms(selection, cutoff) + + exposed = set() + cmd.iterate(selName, "exposed.add((resv))", space=locals()) + cmd.delete(selName) + + selNameRes = cmd.get_unused_name("exposed_res_") + + exposed=sorted(exposed) #list of exposed residues + + seq=cmd.get_fastastr('all') + seqbegin = seq.find('\n') + newSeq = seq[seqbegin::].replace('\n', ' ').replace('\r', '').replace(' ','').replace("?","") + + #adjusts for beginning position + first = set() + allRes = findSurfaceAtoms(selection, 0) + cmd.iterate(allRes, "first.add((resv))", space=locals()) + cmd.delete(allRes) + + selNameRes = cmd.get_unused_name("exposed_res_") + + first=sorted(first)[0] #firstRes + #gets all charged amino acids on the surface + reslist= [] + exposedAtms="" + K=0 + R=0 + D=0 + H=0 + E=0 + + if folded: + offset=(1+first) + for r in exposed: + amino=newSeq[r-offset] + if amino not in "KRDHE": + continue + elif amino=='K': + K+=1 + elif amino=='R': + R+=1 + elif amino=='D': + D+=1 + elif amino=="H": + H+=1 + elif amino=='E': + E+=1 + exposedAtms+=amino + chargedAA=amino + reslist.append(chargedAA) + else: + for r in newSeq: + amino=r + if amino not in "KRDHE": + continue + elif amino=='K': + K+=1 + elif amino=='R': + R+=1 + elif amino=='D': + D+=1 + elif amino=="H": + H+=1 + elif amino=='E': + E+=1 + exposedAtms+=amino + chargedAA=amino + reslist.append(chargedAA) + + + kCharge= 1 / (1 + 10 ** (pH - 10.54)) + rCharge= 1 / (1 + 10 ** (pH - 12.48)) + dCharge= -(1 / (1 + 10 ** (4.07 - pH))) + eCharge= -(1 / (1 + 10 ** (3.90 - pH))) + hCharge= 1 / (1 + 10 ** (pH - 6.04)) + + charge=kCharge*K+rCharge*R+hCharge*H+dCharge*D+eCharge*E + charge=round(charge,2) + if charge>0: + chargetx="+"+str(charge) + else: + chargetx=str(charge) + + if folded: + print ("Exposed charged residues: " +str(exposedAtms)) + print ("The expected surface charge of this protein at pH " + str(pH) +" is: " +chargetx) + + else: + print ("Charged residues: "+str(exposedAtms)) + print ("The expected charge of this denatured protein at pH " +str(pH) +" is: " +chargetx) + + +cmd.extend("findSurfaceAtoms", findSurfaceAtoms) +cmd.extend("findSurfaceCharge", findSurfaceCharge) diff --git a/findSurfaceResiduesListCharged.py b/findSurfaceResiduesListCharged.py deleted file mode 100644 index de37224d..00000000 --- a/findSurfaceResiduesListCharged.py +++ /dev/null @@ -1,195 +0,0 @@ -''' -http://pymolwiki.org/index.php/FindSurfaceResiduesListCharged -''' - -from __future__ import print_function -from pymol import cmd - - -def findSurfaceAtoms(selection="all",cutoff=2.5, quiet=1): - """ -DESCRIPTION - - Finds those atoms on the surface of a protein - that have at least 'cutoff' exposed A**2 surface area. - -USAGE - - findSurfaceAtoms [ selection, [ cutoff ]] - -SEE ALSO - - findSurfaceResidues - """ - cutoff, quiet = float(cutoff), int(quiet) - - tmpObj = cmd.get_unused_name("_tmp") - cmd.create(tmpObj, "(" + selection + ") and polymer", zoom=0) - - cmd.set("dot_solvent", 1, tmpObj) - cmd.get_area(selection=tmpObj, load_b=1) - - # threshold on what one considers an "exposed" atom (in A**2): - cmd.remove(tmpObj + " and b < " + str(cutoff)) - - selName = cmd.get_unused_name("exposed_atm_") - cmd.select(selName, "(" + selection + ") in " + tmpObj) - - cmd.delete(tmpObj) - - if not quiet: - print("Exposed atoms are selected in: " + selName) - - return selName - - -def findSurfaceResiduesListCharged(pH=7.0, selection="all", cutoff=2.5, doShow=0, quiet=1): - """ -DESCRIPTION - - Identifies and lists all charged surface residues. Also calculates a - surface charge at entered pH. - -USAGE - - findSurfaceResiduesListCharged [pH, [selection ,[cutoff ,[ doShow , [ quiet]]]]]] - -ARGUMENTS - - pH = The pH value to estimate a surface charge at - - selection = string: object or selection in which to find exposed - residues {default: all} - - cutoff = float: cutoff of what is exposed or not {default: 2.5 Ang**2} - -RETURNS - - (list: (chain, resv ) ) - A Python list of residue numbers corresponding - to those residues w/more exposure than the cutoff. - - A printout of all charged amino acids and the estimated surface charge of a protein. - - """ - cutoff, doShow, quiet = float(cutoff), int(doShow), int(quiet) - - selName = findSurfaceAtoms(selection, cutoff, quiet) - - exposed = set() - cmd.iterate(selName, "exposed.add((chain,resv))", space=locals()) - - selNameRes = cmd.get_unused_name("exposed_res_") - cmd.select(selNameRes, "byres " + selName) - - if not quiet: - print("Exposed residues are selected in: " + selNameRes) - - if doShow: - cmd.show_as("spheres", "(" + selection + ") and polymer") - cmd.color("white", selection) - cmd.color("yellow", selNameRes) - cmd.color("red", selName) - exposed=sorted(exposed) #list of exposed residues - seq=cmd.get_fastastr('all') - newseq = seq.replace('\n', ' ').replace('\r', '').replace(' ','').replace("?","") - nDomains=newseq.count(">") - - def splitSeq(seq,d): #splits residues into their domains - domain=-1 - count=0 - splits=[""]*d - for r in seq: - if r==">": - domain+=1 - count=7 - if count>0: - count-=1 - continue - if r=="\n": - continue - splits[domain]+=r - return splits - - splits=splitSeq(newseq,nDomains) #list of all residues separated by domain - - #gets all charged amino acids on the surface - reslist=[[] for x in range(nDomains)] - exposedAms="" - K=0 - R=0 - D=0 - H=0 - E=0 - - for r in exposed: - tempDomain=ord(r[0])-65 - if r[1]-1 >= len(splits[tempDomain]): - continue - amino=splits[tempDomain][r[1]-1] - if amino not in "KRDHE": - continue - elif amino=='K': - K+=1 - elif amino=='R': - R+=1 - elif amino=='D': - D+=1 - elif amino=="H": - H+=1 - elif amino=='E': - E+=1 - exposedAms+=amino - amPos=amino+str(r[1]) - reslist[tempDomain].append(amPos) - - n=1 - for dom in reslist: - print("Domain " + str(n)+" charged residues:") - print(dom) - n+=1 - - print("Charged residue list") - print(exposedAms) - print("Number of exposed charged residues") - print(len(exposedAms)) - - if pH > 10.54: - kCharge=10 ** -(pH-10.54) - else: - kCharge=1-(10** (pH-10.54)) - - if pH > 12.48: - rCharge=10 ** -(pH-12.48) - else: - rCharge=1-(10** (pH-12.48)) - - if pH > 4.07: - dCharge=-(1-(10**-(pH-4.07))) - else: - dCharge=-(10**(pH-4.07)) - - if pH > 3.90: - eCharge=-(1-(10**-(pH-3.90))) - else: - eCharge=-(10**(pH-3.90)) - - if pH >6.04: - hCharge=10 ** -(pH-6.04) - else: - hCharge=1-(10** (pH-6.04)) - - - charge=kCharge*K+rCharge*R+hCharge*H+dCharge*D+eCharge*E - charge=round(charge,2) - if charge>0: - chargetx="+"+str(charge) - else: - chargetx=str(charge) - - - print ("The expected surface charge of this protein at pH " + str(pH) +" is: " +chargetx) - return sorted(exposed) - -cmd.extend("findSurfaceAtoms", findSurfaceAtoms) -cmd.extend("findSurfaceResiduesListCharged", findSurfaceResiduesListCharged) From 5fd98ee59b1d25f2931fd0e787a429a816f09bf0 Mon Sep 17 00:00:00 2001 From: Teddy Warner Date: Fri, 24 Mar 2023 12:06:40 -0400 Subject: [PATCH 3/5] update script --- findSurfaceCharge.py | 151 ++++++++++++++++++++----------------------- 1 file changed, 69 insertions(+), 82 deletions(-) diff --git a/findSurfaceCharge.py b/findSurfaceCharge.py index 8a48b4eb..cfd5cefe 100644 --- a/findSurfaceCharge.py +++ b/findSurfaceCharge.py @@ -38,59 +38,33 @@ def findSurfaceAtoms(selection="all",cutoff=2.5): return selName -def findSurfaceCharge(pH=7.0, folded=True, selection="all", cutoff=2.5): - """ -DESCRIPTION - - Calculates a surface charge at entered pH. Also allows for the charge of an unfolded protein to be calculated. - -USAGE - - findSurfaceCharge [pH, [folded, [selection ,[cutoff]]]] - -ARGUMENTS - - pH = The pH value to estimate a surface charge at - - folded = Whether the protein is folded (True) or denatured (False) +def _findSurfaceChargeImpl(selection, pH, folded, cutoff): - selection = string: object or selection in which to find exposed - residues {default: all} - - cutoff = float: cutoff of what is exposed or not {default: 2.5 Ang**2} - -RETURNS - - A printout of the estimated surface charge at a given pH + def get_exposed_residues(selection,cutoff): + cutoff = float(cutoff) - """ - cutoff = float(cutoff) - selName = findSurfaceAtoms(selection, cutoff) + selName = findSurfaceAtoms(selection, cutoff) - exposed = set() - cmd.iterate(selName, "exposed.add((resv))", space=locals()) - cmd.delete(selName) + tempExposed = set() + cmd.iterate(selName, "tempExposed.add((resv,oneletter))", space=locals()) + cmd.delete(selName) - selNameRes = cmd.get_unused_name("exposed_res_") + tempExposed=sorted(tempExposed) #list of exposed residues + exposed=[] + for res in tempExposed: + exposed.append(res[1]+str(res[0])) - exposed=sorted(exposed) #list of exposed residues + return exposed - seq=cmd.get_fastastr('all') - seqbegin = seq.find('\n') - newSeq = seq[seqbegin::].replace('\n', ' ').replace('\r', '').replace(' ','').replace("?","") - - #adjusts for beginning position - first = set() - allRes = findSurfaceAtoms(selection, 0) - cmd.iterate(allRes, "first.add((resv))", space=locals()) - cmd.delete(allRes) + if folded: + exposed = get_exposed_residues(selection,cutoff) + else: + exposed = get_exposed_residues(selection,0) - selNameRes = cmd.get_unused_name("exposed_res_") + pH=float(pH) - first=sorted(first)[0] #firstRes #gets all charged amino acids on the surface - reslist= [] exposedAtms="" K=0 R=0 @@ -98,43 +72,22 @@ def findSurfaceCharge(pH=7.0, folded=True, selection="all", cutoff=2.5): H=0 E=0 - if folded: - offset=(1+first) - for r in exposed: - amino=newSeq[r-offset] - if amino not in "KRDHE": - continue - elif amino=='K': - K+=1 - elif amino=='R': - R+=1 - elif amino=='D': - D+=1 - elif amino=="H": - H+=1 - elif amino=='E': - E+=1 - exposedAtms+=amino - chargedAA=amino - reslist.append(chargedAA) - else: - for r in newSeq: - amino=r - if amino not in "KRDHE": - continue - elif amino=='K': - K+=1 - elif amino=='R': - R+=1 - elif amino=='D': - D+=1 - elif amino=="H": - H+=1 - elif amino=='E': - E+=1 - exposedAtms+=amino - chargedAA=amino - reslist.append(chargedAA) + for r in exposed: + amino=r[0] + if amino not in "KRDHE": + continue + elif amino=='K': + K+=1 + elif amino=='R': + R+=1 + elif amino=='D': + D+=1 + elif amino=="H": + H+=1 + elif amino=='E': + E+=1 + exposedAtms+=amino + chargedAA=amino kCharge= 1 / (1 + 10 ** (pH - 10.54)) @@ -152,11 +105,45 @@ def findSurfaceCharge(pH=7.0, folded=True, selection="all", cutoff=2.5): if folded: print ("Exposed charged residues: " +str(exposedAtms)) - print ("The expected surface charge of this protein at pH " + str(pH) +" is: " +chargetx) + print ("The expected surface charge of " + selection +" at pH " + str(pH) +" is: " +chargetx) else: print ("Charged residues: "+str(exposedAtms)) - print ("The expected charge of this denatured protein at pH " +str(pH) +" is: " +chargetx) + print ("The expected charge of denatured " + selection +" at pH " +str(pH) +" is: " +chargetx) + return (selection, chargetx) + + +def findSurfaceCharge(selection="", pH=7.0, folded=True, cutoff=2.5): + """ +DESCRIPTION + + Calculates a surface charge at entered pH. Also allows for the charge of an unfolded protein to be calculated. + +USAGE + + findSurfaceCharge [pH, [folded, [selection ,[cutoff]]]] + +ARGUMENTS + + pH = The pH value to estimate a surface charge at + + folded = Whether the protein is folded (True) or denatured (False) + + selection = string: object or selection in which to find exposed + residues {default: first - the first protein loaded in} + + cutoff = float: cutoff of what is exposed or not {default: 2.5 Ang**2} + +RETURNS + + A printout of the estimated surface charge at a given pH + + """ + if not selection: + for obj in cmd.get_names(): + _findSurfaceChargeImpl(obj, pH, folded, cutoff) + else: + _findSurfaceChargeImpl(selection, pH, folded, cutoff) cmd.extend("findSurfaceAtoms", findSurfaceAtoms) From 8979fb86f0f71c7d7f67ec547043e35e5fd7b335 Mon Sep 17 00:00:00 2001 From: Teddy Warner Date: Fri, 24 Mar 2023 12:08:07 -0400 Subject: [PATCH 4/5] update script --- findSurfaceCharge.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/findSurfaceCharge.py b/findSurfaceCharge.py index cfd5cefe..e0f1bad7 100644 --- a/findSurfaceCharge.py +++ b/findSurfaceCharge.py @@ -130,7 +130,7 @@ def findSurfaceCharge(selection="", pH=7.0, folded=True, cutoff=2.5): folded = Whether the protein is folded (True) or denatured (False) selection = string: object or selection in which to find exposed - residues {default: first - the first protein loaded in} + residues {default: empty string - all objects} cutoff = float: cutoff of what is exposed or not {default: 2.5 Ang**2} From dcfde523141f13d232ad6d56d98847f2046f414e Mon Sep 17 00:00:00 2001 From: Thomas Holder Date: Mon, 27 Mar 2023 19:14:15 +0200 Subject: [PATCH 5/5] Support multi-chain selections --- findSurfaceCharge.py | 10 +++------- 1 file changed, 3 insertions(+), 7 deletions(-) diff --git a/findSurfaceCharge.py b/findSurfaceCharge.py index e0f1bad7..db78579e 100644 --- a/findSurfaceCharge.py +++ b/findSurfaceCharge.py @@ -47,13 +47,13 @@ def get_exposed_residues(selection,cutoff): selName = findSurfaceAtoms(selection, cutoff) tempExposed = set() - cmd.iterate(selName, "tempExposed.add((resv,oneletter))", space=locals()) + cmd.iterate(selName, "tempExposed.add((model,segi,chain,resv,resi,oneletter))", space=locals()) cmd.delete(selName) tempExposed=sorted(tempExposed) #list of exposed residues exposed=[] for res in tempExposed: - exposed.append(res[1]+str(res[0])) + exposed.append(res[-1] + res[-2]) return exposed @@ -97,11 +97,7 @@ def get_exposed_residues(selection,cutoff): hCharge= 1 / (1 + 10 ** (pH - 6.04)) charge=kCharge*K+rCharge*R+hCharge*H+dCharge*D+eCharge*E - charge=round(charge,2) - if charge>0: - chargetx="+"+str(charge) - else: - chargetx=str(charge) + chargetx = "%+.2f" % (charge) if folded: print ("Exposed charged residues: " +str(exposedAtms))