Comment terminer l’obtention de sous-chaînes d’un génome codant pour une séquence donnée d’acides aminés

Le problème de codage peptidique consiste à rechercher des sous-chaînes d’un génome codant pour une séquence d’acides aminés donnée.

Le code génétique décrit la traduction d’un ARN 3-mer (codon) dans l’un des 20 acides aminés différents. Les trois premiers cercles, se déplaçant de l’intérieur vers l’extérieur, représentent les 1er, 2ème et 3ème nucléotides d’un codon donné. Les quasortingème, cinquième et sixième cercles définissent l’acide aminé traduit de trois manières: son nom complet, son abréviation de 3 lettres et son abréviation d’une lettre. Trois des 64 codons d’ARN total sont des codons d’arrêt, qui arrêtent la traduction et ajoutent implicitement un 21ème symbole d’arrêt à l’alphabet des acides aminés.

entrez la description de l'image ici

Le problème consiste à avoir

Entrée: une chaîne d’ADN Texte et une chaîne d’acides aminés Peptide.

et

Sortie: toutes les sous-chaînes du peptide codant le texte (s’il en existe de telles).

Sample Input: ATGGCCATGGCCCCCAGAACTGAGATCAATAGTACCCGTATTAACGGGTGA MA Sample Output: ATGGCC GGCCAT ATGGCC 

le résultat est obtenu en lisant des sortingplets, ATGGCC code donc MA , GGCCAT inverse serait CCGGTA écrit en arrière, donc ATGGCC (CCGGTA.reverse)

le complément du code génétique est CG, GC, TA et AT

Donc, pour résoudre ce problème, j’utilise d’abord des tableaux déjà définis

 using System; using System.Collections.Generic; using System.Text; using System.Text.RegularExpressions; using System.Linq; using System.IO; using System.Collections; class Program { private static ssortingng[] CODONS = { "TTT", "TTC", "TTA", "TTG", "TCT", "TCC", "TCA", "TCG", "TAT", "TAC", "TGT", "TGC", "TGG", "CTT", "CTC", "CTA", "CTG", "CCT", "CCC", "CCA", "CCG", "CAT", "CAC", "CAA", "CAG", "CGT", "CGC", "CGA", "CGG", "ATT", "ATC", "ATA", "ATG", "ACT", "ACC", "ACA", "ACG", "AAT", "AAC", "AAA", "AAG", "AGT", "AGC", "AGA", "AGG", "GTT", "GTC", "GTA", "GTG", "GCT", "GCC", "GCA", "GCG", "GAT", "GAC", "GAA", "GAG", "GGT", "GGC", "GGA", "GGG", }; private static ssortingng[] AMINOS_PER_CODON = { "F", "F", "L", "L", "S", "S", "S", "S", "Y", "Y", "C", "C", "W", "L", "L", "L", "L", "P", "P", "P", "P", "H", "H", "Q", "Q", "R", "R", "R", "R", "I", "I", "I", "M", "T", "T", "T", "T", "N", "N", "K", "K", "S", "S", "R", "R", "V", "V", "V", "V", "A", "A", "A", "A", "D", "D", "E", "E", "G", "G", "G", "G", }; private static ssortingng[] AMINO_ABBREVIATIONS = { "F", "L", "I", "M", "V", "S", "P", "T", "A", "Y", "H", "Q", "N", "K", "D", "E", "C", "W", "R", "G" }; private static ssortingng[] FULL_NAMES = { "phenylalanine", "leucine", "isoleucine", "methionine", "valine", "serine", "proline", "threonine", "alanine", "tyrosine", "histidine", "glutamine", "asparagine", "lysine", "aspartic acid", "glutamic acid", "cysteine", "tryptophan", "arginine", "glycine" }; static void Main() { //then input is DNA ssortingng dna = ""; //The given peptide ssortingng givenPeptide = "KEVFEPHYY"; char[] givenPeptideArray = givenPeptide.ToCharArray() ; ssortingng codon=""; ssortingng auxCodon=""; ssortingng convertedCodon = ""; ssortingng clean = ""; int pivot = 0; int foundFlag = 0; ssortingng cleanReverse = ""; int foundFlagReverse = 0; ssortingng convertedCodonReverse = ""; //so the idea I was working was to loop dna ssortingng until getting to 3*givenPeptide.Length //because dna is red 3 by 3... //then get givenPeptide.Length and compare each char of given peptide with encoded //letter of scanned sortingplet //so for example first givenpeptide is 'M' so I would search for 'ATG' //then, if found increment a counter and then search for next 'GCC' etc... //however When testing with longer ssortingng I do not get complete result //I have for (int pos = 0; pos < dna.Length - (3*givenPeptide.Length) ; pos++) { pivot = pos; for (int codonPos = 0; codonPos < givenPeptide.Length; codonPos++) { codon = dna.Substring(pivot, 3); auxCodon = givenPeptideArray[codonPos].ToString(); convertedCodon = codonToAminoAcid(codon); convertedCodonReverse = codonToAminoAcid(DNAComplement(codon)); if (auxCodon.Equals(convertedCodon) ) { foundFlag++; clean += codon; } else { foundFlag = 0; clean = ""; } if (foundFlag == givenPeptide.Length) { Console.WriteLine(clean); foundFlag = 0; clean = ""; } //reverse if (auxCodon.Equals(convertedCodonReverse)) { foundFlagReverse++; cleanReverse += codon; } else { foundFlagReverse = 0; cleanReverse = ""; } if (foundFlagReverse == givenPeptide.Length) { Console.WriteLine(cleanReverse); foundFlagReverse = 0; cleanReverse = ""; } pivot+=3; } } }//end main public static string DNAComplement(string dna) { char[] array = dna.ToCharArray(); for (int i = 0; i < array.Length; i++) { char let = array[i]; if (let == 'A') array[i] = 'T'; else if (let == 'T') array[i] = 'A'; else if (let == 'C') array[i] = 'G'; else if (let == 'G') array[i] = 'C'; } Array.Reverse(array); return new string(array); } public static string codonToAminoAcid(String codon) { for (int k = 0; k < CODONS.Length ; k++) { if (CODONS[k].Equals (codon)) { return AMINOS_PER_CODON[k]; } } // never reach here with valid codon return "X"; } }//end class 

Je devrais avoir

 AAGGAAGTATTTGAGCCTCATTATTAC AAAGAGGTGTTTGAACCTCATTACTAT AAGGAGGTATTTGAACCCCACTATTAC AAAGAAGTTTTCGAACCACATTATTAC AAGGAAGTGTTTGAACCTCACTATTAT AAAGAAGTTTTCGAGCCGCACTACTAC AAGGAAGTATTCGAACCACATTACTAT ATAATAATGCGGCTCGAATACTTCCTT GTAGTAATGGGGCTCGAAAACCTCCTT GTAGTAATGAGGTTCAAAAACCTCCTT GTAGTAATGGGGTTCGAAGACTTCCTT ATAATAGTGAGGCTCAAAAACTTCCTT ATAGTAATGGGGTTCGAAGACTTCCTT GTAGTAGTGCGGCTCAAAAACTTCCTT ATAGTAATGAGGTTCGAAAACCTCTTT ATAATAATGTGGCTCGAACACTTCTTT GTAGTAATGGGGCTCAAACACCTCTTT ATAGTAGTGAGGTTCGAAGACTTCCTT GTAATAGTGCGGTTCAAAAACTTCCTT ATAGTAGTGTGGTTCAAATACCTCCTT 

Cependant, je ne reçois que:

 AAGGAAGTATTTGAGCCTCATTATTAC AAAGAGGTGTTTGAACCTCATTACTAT AAGGAAGTGTTTGAACCTCACTATTAT AAAGAAGTTTTCGAGCCGCACTACTAC AAGGAGGTATTTGAACCCCACTATTAC AAAGAAGTTTTCGAACCACATTATTAC 

Je pensais au problème et il semble que le problème est que je ne fais pas une boucle complète à chaque fois …

Il semble que le problème est double. Votre boucle for externe ne va pas assez loin pour attraper la fin de la chaîne d’adn. Deuxièmement, il semble que l’inversion de codons individuels ne fonctionne pas et que vous devez prendre la chaîne de caractères entière (GivenPeptideLength * 3) avant de recevoir le complément. Les modifications suivantes affichent les chaînes que vous avez insérées dans votre exemple. J’espère que le résultat sera tel que vous le souhaitez. Cela a aussi l’avantage d’être beaucoup plus concis.

 using System; using System.Collections.Generic; using System.Text; using System.Text.RegularExpressions; using System.Linq; using System.IO; using System.Collections; namespace DNA { class Program { private static ssortingng[] CODONS = { "TTT", "TTC", "TTA", "TTG", "TCT", "TCC", "TCA", "TCG", "TAT", "TAC", "TGT", "TGC", "TGG", "CTT", "CTC", "CTA", "CTG", "CCT", "CCC", "CCA", "CCG", "CAT", "CAC", "CAA", "CAG", "CGT", "CGC", "CGA", "CGG", "ATT", "ATC", "ATA", "ATG", "ACT", "ACC", "ACA", "ACG", "AAT", "AAC", "AAA", "AAG", "AGT", "AGC", "AGA", "AGG", "GTT", "GTC", "GTA", "GTG", "GCT", "GCC", "GCA", "GCG", "GAT", "GAC", "GAA", "GAG", "GGT", "GGC", "GGA", "GGG", }; private static ssortingng[] AMINOS_PER_CODON = { "F", "F", "L", "L", "S", "S", "S", "S", "Y", "Y", "C", "C", "W", "L", "L", "L", "L", "P", "P", "P", "P", "H", "H", "Q", "Q", "R", "R", "R", "R", "I", "I", "I", "M", "T", "T", "T", "T", "N", "N", "K", "K", "S", "S", "R", "R", "V", "V", "V", "V", "A", "A", "A", "A", "D", "D", "E", "E", "G", "G", "G", "G", }; private static ssortingng[] AMINO_ABBREVIATIONS = { "F", "L", "I", "M", "V", "S", "P", "T", "A", "Y", "H", "Q", "N", "K", "D", "E", "C", "W", "R", "G" }; private static ssortingng[] FULL_NAMES = { "phenylalanine", "leucine", "isoleucine", "methionine", "valine", "serine", "proline", "threonine", "alanine", "tyrosine", "histidine", "glutamine", "asparagine", "lysine", "aspartic acid", "glutamic acid", "cysteine", "tryptophan", "arginine", "glycine" }; public static ssortingng DNAComplement(ssortingng dna) { char[] array = dna.ToCharArray(); for (int i = 0; i < array.Length; i++) { char let = array[i]; if (let == 'A') array[i] = 'T'; else if (let == 'T') array[i] = 'A'; else if (let == 'C') array[i] = 'G'; else if (let == 'G') array[i] = 'C'; } Array.Reverse(array); return new string(array); } public static string StringToAminoAcid(String input) { string result = ""; for (int i = 0; i < input.Length; i += 3) { result += codonToAminoAcid(input.Substring(i, 3)); } return result; } public static string codonToAminoAcid(String codon) { for (int k = 0; k < CODONS.Length; k++) { if (CODONS[k].Equals(codon)) { return AMINOS_PER_CODON[k]; } } // never reach here with valid codon return "X"; } static void Main() { //then input is DNA string dna = ""; //The given peptide string givenPeptide = "KEVFEPHYY"; int resultCount = 0; int candidateLength = givenPeptide.Length * 3; string forward = ""; string backward = ""; for (int pos = 0; pos < dna.Length - candidateLength + 1; pos++) // Added the "+ 1" { forward = dna.Substring(pos, candidateLength); backward = DNAComplement(forward); // Unremark to get a glimpse into what is happening... //Console.WriteLine("pos: {0}, forward: {1}, backward: {2}, forwardTranslation: {3}, backwardTranslation: {4}", // pos, forward, backward, StringToAminoAcid(forward), StringToAminoAcid(backward)); if ( (StringToAminoAcid(forward) == givenPeptide) || (StringToAminoAcid(backward) == givenPeptide) ) { resultCount++; Console.WriteLine(String.Format("Result {0,3} at position {1,6}... {2}", resultCount, pos, forward)); } } Console.WriteLine("Done"); Console.ReadLine(); }//end main }//end class }