PYTHON 10
Ps.py Guest on 20th November 2020 06:09:00 PM
  1. import sys
  2. import re
  3. AminoAcids = ["A","R","N","D","C","Q","E","G","H","I","L","K","M","F","P","S","T","W","Y","V"]
  4.  
  5. def ReadAAIndex(path,featuresname):
  6.     featuresarray = list()
  7.     with open(path) as AAIndexFile:
  8.         for line in iter(AAIndexFile.readline,''):
  9.             if line.startswith("H"):
  10.                 name = line[2:-1]
  11.                 if name in featuresname:
  12.                     temp = line
  13.                     while ( not line.startswith("I")):
  14.                         line = AAIndexFile.readline()
  15.                     line = AAIndexFile.readline()[4:-1] +" "+ AAIndexFile.readline()[4:-1]
  16.                     line = re.sub( '\s+', ' ', line ).strip()
  17.                     temp = line.split(" ")
  18.                     for k in range(len(temp)):
  19.                         if temp[k] == "NA":
  20.                             temp[k] = 0.0
  21.                         temp[k] = float(temp[k])
  22.                     featuresarray.append(temp)
  23.  
  24.     return featuresarray
  25.  
  26. def ComputeSeq(seq, featuresarray, lambdanum, weight):
  27.     result = [0]*(lambdanum + 20)
  28.     for i in range(20):
  29.         for char in seq:
  30.             for char2 in AminoAcids:
  31.                 if char == char2:
  32.                     result[AminoAcids.index(char2)]+=1
  33.     for i in range(len(result)):
  34.         result[i] = result[i]/float(len(seq))
  35.     for lam in range(1, lambdanum + 1):
  36.         for i in range(len(seq) - lambdanum):
  37.             avalinchar = AminoAcids.index(seq[i])
  38.             dovominchar = AminoAcids.index(seq[i+lambdanum])
  39.             for feature in featuresarray:
  40.                 result[lam-1] = result[lam -1] + ( feature[dovominchar] - feature[avalinchar] )**2
  41.             result[lam-1] = result[lam-1]/float(len(featuresarray))
  42.         result[lam-1] = result[lam-1]/(float(len(seq) - lam))
  43.         sumfreq = sum(result[0:20])
  44.         sumpaac = sum(result[20:])
  45.     for i in range(len(result)):
  46.         result[i] = result[i]/float(sumfreq + weight * sumpaac)
  47.     return result
  48.  
  49. def ReadSeq(filename,featuresarray, lambdanum,weight,writingpath):
  50.     with open(writingpath, 'w') as result:
  51.         with open(filename) as fasta:
  52.             sequence = ""
  53.             seqname = ""
  54.             for line in fasta:
  55.                 if line.startswith(">"):
  56.                     if sequence == "":
  57.                         seqname = line[:-1]
  58.                     else:
  59.                         seqname = line[:-1]
  60.                         d=ComputeSeq(sequence,featuresarray,lambdanum, weight)
  61.                         for i in d:
  62.                             result.write(str(i) + ",")
  63.                         sequence = ""
  64.                 else:
  65.                     sequence = sequence + line[:-1]
  66.             d=ComputeSeq(sequence, featuresarray, lambdanum, weight)
  67.             for i in d:
  68.                 result.write(str(i) + ",")
  69. def ReadFeaturesName(path):
  70.     featuresname = ""
  71.     with open(path) as namesfile:
  72.         for line in namesfile:
  73.             line = line.replace("\n", " ")
  74.             line = line.replace("!!!", " ")
  75.             line = re.sub( '\s+', ' ', line ).strip()
  76.             featuresname = featuresname + line
  77.         featuresname = featuresname.split(" ")
  78.         return featuresname
  79. writingpath = "psedu.csv"
  80. featuresname = ReadFeaturesName(r"names.txt")
  81. featuresarray = ReadAAIndex(r"aaindex.txt",featuresname)
  82. ReadSeq(r"seqfile.txt",featuresarray,13, 0.5, writingpath)#landa-w

Paste is for source code and general debugging text.

Login or Register to edit, delete and keep track of your pastes and more.

Raw Paste

Login or Register to edit or fork this paste. It's free.