diff --git a/src/main/java/cz/mendelu/dnaAnalyser/analyse/cpg/CpgAnalyser.java b/src/main/java/cz/mendelu/dnaAnalyser/analyse/cpg/CpgAnalyser.java index ce7b7b6226d643944011f1ca3a7f0a9a286848d5..8893170752f73c52e36dbc808557f0b81c6a55cb 100644 --- a/src/main/java/cz/mendelu/dnaAnalyser/analyse/cpg/CpgAnalyser.java +++ b/src/main/java/cz/mendelu/dnaAnalyser/analyse/cpg/CpgAnalyser.java @@ -6,105 +6,110 @@ import java.util.ArrayList; import java.util.List; + @Slf4j @Service public class CpgAnalyser { - private static double MIN_GC_PERCENTAGE; - private static double MIN_OBSERVED_TO_EXPECTED_CPG; - private static int MIN_WINDOW_SIZE; - private static int MIN_ISLAND_MERGE_GAP; - private static char FIRST_NUCLEOTIDE; - private static char SECOND_NUCLEOTIDE; + private double MIN_GC_PERCENTAGE = 0.5; + private double MIN_OBSERVED_TO_EXPECTED_CPG = 0.6; + private char FIRST_NUCLEOTIDE = 'C'; + private char SECOND_NUCLEOTIDE = 'G'; + private int MIN_WINDOW_SIZE = 200; + private double MIN_ISLAND_MERGE_GAP = 100; - private static boolean findIsland1BpShifts(CpgWindow window) { - while(true) { - if (window.isIsland()) return true; - if (!window.expandRight()) return false; - window.shrinkLeft(); + public static boolean find_island_1bp_shifts(CpgWindow window) { + while (true) { + if (window.is_island()) { + return true; + } + if (!window.expand_right()) { + return false; + } + window.shrink_left(); } } - private static boolean twoFoldShrinking(CpgWindow window) { - while(!window.isIsland()) { - window.shrinkRight(); - window.shrinkLeft(); - window.windowSize = window.windowSize - 2; - if (window.windowSize < MIN_WINDOW_SIZE) return false; + public boolean two_fold_shrinking(CpgWindow window) { + while (!window.is_island()) { + window.shrink_right(); + window.shrink_left(); + window.window_size -= 2; + if (window.window_size < MIN_WINDOW_SIZE) { + return false; + } } return true; } - private static ArrayList<CpgWindow> mergeIslands(ArrayList<CpgWindow> foundIslands) { - ArrayList<CpgWindow> mergedIslands = new ArrayList<>(); - mergedIslands.add(foundIslands.get(0)); - if (foundIslands.size() >= 2) { - for (CpgWindow foundIsland : foundIslands.subList(1, foundIslands.size())) { - if (foundIsland.windowBegin - mergedIslands.get(mergedIslands.size() - 1).windowEnd < MIN_ISLAND_MERGE_GAP) { - mergedIslands.get(mergedIslands.size() - 1).joinWindows(foundIsland); + public List<CpgWindow> merge_islands(List<CpgWindow> found_islands) { + List<CpgWindow> merged_islands = new ArrayList<>(); + merged_islands.add(found_islands.get(0)); + if (found_islands.size() >= 2) { + for (int i = 1; i < found_islands.size(); i++) { + CpgWindow found_island = found_islands.get(i); + if (found_island.window_begin - merged_islands.get(merged_islands.size() - 1).window_end < MIN_ISLAND_MERGE_GAP) { + merged_islands.get(merged_islands.size() - 1).join_window(found_island); } else { - mergedIslands.add(foundIsland); + merged_islands.add(found_island); } } } - return mergedIslands; + return merged_islands; } - private static void extendIslandWindowShifts(CpgWindow window) { - while(true) { - boolean extended = window.expandRightIsland(); - if (!extended) break; + public void extend_island_window_shifts(CpgWindow window) { + while (true) { + boolean extended = window.expand_right_island(); + if (!extended) { + break; + } } - window.updateGcCount(); + window.update_gc_count(); } - private static ArrayList<CpgWindow> findIslands(String sequence) { - if (sequence.length() < MIN_WINDOW_SIZE) { - return new ArrayList<>(); - } - ArrayList<CpgWindow> foundIslands = new ArrayList<>(); - int recordPosition = 0; - while(true) { - CpgWindow window = new CpgWindow(sequence, recordPosition, MIN_WINDOW_SIZE, MIN_GC_PERCENTAGE, MIN_OBSERVED_TO_EXPECTED_CPG, MIN_WINDOW_SIZE, FIRST_NUCLEOTIDE, SECOND_NUCLEOTIDE); - // step 1 find window that is an island - boolean recordContinues = findIsland1BpShifts(window); - if (!recordContinues) break; - - // step 2 window-length shifts - extendIslandWindowShifts(window); - - // step 3 shift the last window by 1bp until it meets the criteria - window.rollbackUntilIsland(); + public List<CpgWindow> find_islands(String record) { + List<CpgWindow> found_islands = new ArrayList<>(); + int record_position = 0; + while (true) { + CpgWindow window = new CpgWindow(record, record_position, MIN_WINDOW_SIZE, MIN_WINDOW_SIZE, MIN_GC_PERCENTAGE, MIN_OBSERVED_TO_EXPECTED_CPG, FIRST_NUCLEOTIDE, SECOND_NUCLEOTIDE); - // step 4 shrink the whole island by 1bp until it meets the criteria - if(!twoFoldShrinking(window)){ - recordPosition = window.windowBegin + 1; + boolean record_continues = find_island_1bp_shifts(window); + if (!record_continues) { + break; + } + extend_island_window_shifts(window); + window.rollback_until_island(); + if (!two_fold_shrinking(window)) { + record_position = window.window_begin + 1; continue; } - foundIslands.add(window); - recordPosition = window.windowEnd; - if (recordPosition + MIN_WINDOW_SIZE >= sequence.length()) break; + found_islands.add(window); + record_position = window.window_end; } - // step 5 at the end merge islands that are at least 100bp apart - if (foundIslands.isEmpty()) return foundIslands; - return mergeIslands(foundIslands); + + if (found_islands.isEmpty()) { + return new ArrayList<>(); + } + return merge_islands(found_islands); } private void initialize(int minWindowSize, double minGcPercentage, double minObservedToExpectedCpG, int minIslandMergeGap, char firstNucleotide, char secondNucleotide) { - MIN_GC_PERCENTAGE = minGcPercentage; - MIN_OBSERVED_TO_EXPECTED_CPG = minObservedToExpectedCpG; - MIN_WINDOW_SIZE = minWindowSize; - MIN_ISLAND_MERGE_GAP = minIslandMergeGap; - FIRST_NUCLEOTIDE = firstNucleotide; - SECOND_NUCLEOTIDE = secondNucleotide; + this.MIN_WINDOW_SIZE = minWindowSize; + this.MIN_GC_PERCENTAGE = minGcPercentage; + this.MIN_OBSERVED_TO_EXPECTED_CPG = minObservedToExpectedCpG; + this.MIN_ISLAND_MERGE_GAP = minIslandMergeGap; + this.FIRST_NUCLEOTIDE = firstNucleotide; + this.SECOND_NUCLEOTIDE = secondNucleotide; + } public ArrayList<CpgAnalyseResult> getResults(String sequence, int minWindowSize, double minGcPercentage, double minObservedToExpectedCpG, int minIslandMergeGap, char firstNucleotide, char secondNucleotide) { initialize(minWindowSize, minGcPercentage, minObservedToExpectedCpG, minIslandMergeGap, firstNucleotide, secondNucleotide); - ArrayList<CpgWindow> islands = findIslands(sequence); + List<CpgWindow> islands = find_islands(sequence); ArrayList<CpgAnalyseResult> results = new ArrayList<>(); - for (CpgWindow island : islands) { - results.add(new CpgAnalyseResult(null, island.windowBegin, island.windowEnd, island.record.substring(island.windowBegin, island.windowEnd), island.getGcPerc(), island.getObsExp(), island.windowSize)); + for(CpgWindow island : islands) { + results.add(new CpgAnalyseResult(null, island.window_begin, island.window_end, island.record.substring(island.window_begin, island.window_begin + 20) + "...", island.getGcPerc(), island.getObsExp(), island.window_size)); } return results; } diff --git a/src/main/java/cz/mendelu/dnaAnalyser/analyse/cpg/CpgController.java b/src/main/java/cz/mendelu/dnaAnalyser/analyse/cpg/CpgController.java index b32d3d0d22dd20c1140498219bf9e0c37bee8d7f..f29228e3b3e2534c8fdfe1bd660a4ba4712ff6f5 100644 --- a/src/main/java/cz/mendelu/dnaAnalyser/analyse/cpg/CpgController.java +++ b/src/main/java/cz/mendelu/dnaAnalyser/analyse/cpg/CpgController.java @@ -3,8 +3,11 @@ package cz.mendelu.dnaAnalyser.analyse.cpg; import cz.mendelu.dnaAnalyser.analyse.zdna.ZdnaAnalyseResult; import cz.mendelu.dnaAnalyser.jwt.JwtAuthenticationUtils; import cz.mendelu.dnaAnalyser.jwt.JwtTokenService; +import cz.mendelu.dnaAnalyser.sequence.Sequence; import cz.mendelu.dnaAnalyser.sequence.SequenceService; import cz.mendelu.dnaAnalyser.exporter.ExporterService; +import cz.mendelu.dnaAnalyser.sequence.data.SequenceData; +import cz.mendelu.dnaAnalyser.sequence.data.SequenceDataRepository; import cz.mendelu.dnaAnalyser.utils.analyse.service.heatmap.Heatmap; import cz.mendelu.dnaAnalyser.utils.controller.DeleteMapping; import cz.mendelu.dnaAnalyser.utils.controller.GetMapping; @@ -77,6 +80,9 @@ public class CpgController { @Autowired private JwtTokenService jwtTokenService; + @Autowired + private SequenceDataRepository sequenceDataRepository; + @Autowired private CpgService cpgService; @@ -100,6 +106,38 @@ public class CpgController { return Response.response(cpg); } + @GetMapping("/{id}/substring") + @ResponseStatus(HttpStatus.OK) + @PreAuthorize("isAuthenticated()") + @ApiOperation("Get a substring of the sequence by ID, start, and end positions.") + public ResponseEntity<String> getSequenceSubstring( + @PathVariable UUID id, + @RequestParam("start") Integer start, + @RequestParam("end") Integer end) { + + // Fetch the sequence using the provided ID. You might need to adjust this code + // based on how your SequenceService is implemented and how sequences are stored. + Sequence sequence = sequenceService.findOne(id); + if (sequence == null) { + return new ResponseEntity<>("Sequence not found", HttpStatus.NOT_FOUND); + } + + + String sequence_string = sequenceDataRepository.load(sequence).toPlain(); + + // Check if start and end positions are within the sequence length + if (start < 0 || end > sequence_string.length() || start >= end) { + return new ResponseEntity<>("Invalid start or end position", HttpStatus.BAD_REQUEST); + } + + // Extract the substring based on the provided start and end positions + String substring = sequence_string.substring(start, end); + + // Return the substring + return new ResponseEntity<>(substring, HttpStatus.OK); + } + + @GetMapping("/{id}/analysis") @ResponseStatus(HttpStatus.OK) @PreAuthorize("isAuthenticated()") @@ -115,8 +153,13 @@ public class CpgController { @ResponseStatus(HttpStatus.OK) @PreAuthorize("isAuthenticated()") @ApiOperation("Get CpG islands analysis by ID.") - public ResponsePage<CpgAnalyseResult> getCpgList(@PathVariable UUID id) { - Page<CpgAnalyseResult> cpgPage = cpgService.returnCpgAnalysis(id); + public ResponsePage<CpgAnalyseResult> getCpgList(@PathVariable UUID id, + Integer sequenceStart, + Integer sequenceLength, + PaginationParam paginationParam, + SortParam sortParam) { + Pageable pageable = paginationParam.toPageable(sortParam.toSort()); + Page<CpgAnalyseResult> cpgPage = cpgService.returnCpgAnalysis(id, sequenceStart, sequenceLength, pageable); return ResponsePage.responsePage(cpgPage); } diff --git a/src/main/java/cz/mendelu/dnaAnalyser/analyse/cpg/CpgMapper.java b/src/main/java/cz/mendelu/dnaAnalyser/analyse/cpg/CpgMapper.java index 66cca0124e2499e582a15c9ac3cd7fc5db960b3c..30ce4e7808bd20fe1479ef373a31626854981ca4 100644 --- a/src/main/java/cz/mendelu/dnaAnalyser/analyse/cpg/CpgMapper.java +++ b/src/main/java/cz/mendelu/dnaAnalyser/analyse/cpg/CpgMapper.java @@ -25,6 +25,11 @@ public interface CpgMapper extends DataLocalisedMapper<CpgAnalyseResult> { @Select("SELECT count(*) FROM CPG") int countAll(); + @Select("SELECT count(*) FROM CPG WHERE POSITION >= #{startPosition} AND POSITION < #{endPosition}") + int countAllBetweenPosition( + @Param("startPosition") int startPosition, + @Param("endPosition") int endPosition); + @Select("SELECT * FROM CPG") @Results(value = { @Result(property = "id", column = "ID"), @@ -99,6 +104,22 @@ public interface CpgMapper extends DataLocalisedMapper<CpgAnalyseResult> { @Param("orderName") String orderName, RowBounds rowBounds); + @Select("SELECT * FROM CPG WHERE POSITION >= #{startPosition} AND POSITION < #{endPosition} ORDER BY ${orderName} DESC") + @Results(value = { + @Result(property = "id", column = "ID"), + @Result(property = "position", column = "position"), + @Result(property = "end", column = "end"), + @Result(property = "sequence", column = "sequence"), + @Result(property = "gcPerc", column = "gcPerc"), + @Result(property = "observedToExpectedCpG", column = "observedToExpectedCpG"), + @Result(property = "length", column = "length") + }) + List<CpgAnalyseResult> getAllBetweenPositionOrderDesc( + @Param("startPosition") int startPosition, + @Param("endPosition") int endPosition, + @Param("orderName") String orderName, + RowBounds rowBounds); + @Delete("DELETE from CPG WHERE ID = #{id}") void delete(int id); diff --git a/src/main/java/cz/mendelu/dnaAnalyser/analyse/cpg/CpgService.java b/src/main/java/cz/mendelu/dnaAnalyser/analyse/cpg/CpgService.java index 895d24c8e6215b18440a6feff5b1c1d7b38fb2fd..6c012e81d2be1a792cd7ada277ddb736deaf7894 100644 --- a/src/main/java/cz/mendelu/dnaAnalyser/analyse/cpg/CpgService.java +++ b/src/main/java/cz/mendelu/dnaAnalyser/analyse/cpg/CpgService.java @@ -87,15 +87,37 @@ public class CpgService extends ModelService<Cpg> { return cpg; } - public Page<CpgAnalyseResult> returnCpgAnalysis(UUID id) { + public Page<CpgAnalyseResult> returnCpgAnalysis(UUID id, Integer sequenceStart, Integer sequenceLength, Pageable pageable) { Cpg analysis = findOne(id); + + int start = (sequenceStart == null) ? 0 : sequenceStart; + int end = start + ((sequenceLength == null) ? analysis.getSequence().getLength() : sequenceLength); + try (SqlSession sqlSession = cpgDataSessionService.openDataSession(analysis)) { CpgMapper cpgMapper = sqlSession.getMapper(CpgMapper.class); - Page<CpgAnalyseResult> cpgPage = new PageImpl<>(cpgMapper.getAll()); - return cpgPage; + + RowBounds rowBounds = new RowBounds((int) pageable.getOffset(), pageable.getPageSize()); + String orderName = "POSITION"; + Sort.Direction orderDirection = Sort.Direction.ASC; + Sort.Order order = getSortOrderOrNull(pageable); + if (order != null) { + orderName = order.getProperty().toUpperCase(); + orderDirection = order.getDirection(); + } + + List<CpgAnalyseResult> list; + if (orderDirection == Sort.Direction.ASC) { + list = cpgMapper.getAllBetweenPositionOrderAsc(start, end, orderName, rowBounds); + } else { + list = cpgMapper.getAllBetweenPositionOrderDesc(start, end, orderName, rowBounds); + } + int total = cpgMapper.countAllBetweenPosition(start, end); + + return new PageImpl<>(list, pageable, total); } } + public Heatmap getHeatmap(UUID id, Integer segmentsCount, Integer from, Integer to) { Cpg analysis = findOne(id); return cpgDataSessionService.heatmap(CpgMapper.class, analysis, segmentsCount, from, to); diff --git a/src/main/java/cz/mendelu/dnaAnalyser/analyse/cpg/CpgWindow.java b/src/main/java/cz/mendelu/dnaAnalyser/analyse/cpg/CpgWindow.java index 479324c62b766eb9c2f4b5f7979b824a401c74ae..7c0a2a78ad36f30b4b80bdfff0322326f1f850bd 100644 --- a/src/main/java/cz/mendelu/dnaAnalyser/analyse/cpg/CpgWindow.java +++ b/src/main/java/cz/mendelu/dnaAnalyser/analyse/cpg/CpgWindow.java @@ -1,171 +1,162 @@ package cz.mendelu.dnaAnalyser.analyse.cpg; - -import lombok.Getter; +import java.util.regex.Pattern; public class CpgWindow { - public final String record; - private final int recordLen; - public int windowBegin; - public int windowSize; - public int windowEnd; - private int gcCount; - private double obsCpg; - @Getter - private double obsExp; - @Getter - private double gcPerc; - private final double MIN_GC_PERCENTAGE; - private final double MIN_OBSERVED_TO_EXPECTED_CPG; + private final Pattern patternGorC; private final int MIN_WINDOW_SIZE; - private final char FIRST_NUCLEOTIDE; private final char SECOND_NUCLEOTIDE; - - public CpgWindow( String sequence, - int windowBegin, - int windowSize, - double MIN_GC_PERCENTAGE, - double MIN_OBSERVED_TO_EXPECTED_CPG, - int MIN_WINDOW_SIZE, - char FIRST_NUCLEOTIDE, - char SECOND_NUCLEOTIDE) { - - this.record = sequence; - this.windowBegin = windowBegin; - this.windowSize = windowSize; - this.recordLen = sequence.length(); - this.windowEnd = windowBegin + windowSize; - this.gcCount = 0; - this.obsCpg = 0.0; - this.obsExp = 0.0; - this.gcPerc = 0.0; + private final char FIRST_NUCLEOTIDE; + private final double MIN_OBSERVED_TO_EXPECTED_CPG; + private final double MIN_GC_PERCENTAGE; + public String record; + private final int record_len; + public int window_begin; + public int window_size; + public int window_end; + private long gc_count; + private int obs_cpg; + private double gc_perc; + private double obs_exp; + + + + public CpgWindow(String record, int window_begin, int window_size, int MIN_WINDOW_SIZE, double MIN_GC_PERCENTAGE, double MIN_OBSERVED_TO_EXPECTED_CPG, char FIRST_NUCLEOTIDE, char SECOND_NUCLEOTIDE) { + this.record = record; + this.patternGorC = Pattern.compile("(?=(" + FIRST_NUCLEOTIDE + "|" + SECOND_NUCLEOTIDE + "))"); + this.record_len = record.length(); + this.window_begin = window_begin; + this.window_size = window_size; + this.window_end = window_begin + window_size; + this.gc_count = 0; + this.obs_cpg = 0; + this.gc_perc = 0; + this.obs_exp = 0; this.MIN_GC_PERCENTAGE = MIN_GC_PERCENTAGE; this.MIN_OBSERVED_TO_EXPECTED_CPG = MIN_OBSERVED_TO_EXPECTED_CPG; - this.MIN_WINDOW_SIZE = MIN_WINDOW_SIZE; this.FIRST_NUCLEOTIDE = FIRST_NUCLEOTIDE; this.SECOND_NUCLEOTIDE = SECOND_NUCLEOTIDE; - this.updateGcCount(); - + this.MIN_WINDOW_SIZE = MIN_WINDOW_SIZE; + update_gc_count(); } - public String toString() { - return "Window at (" + this.windowBegin + ", " + this.windowEnd + ")\tgcper:" + this.gcPerc + "\tobs_exp:" + this.obsExp + "\twin_length:" + this.windowSize; + return "Window at (" + window_begin + ", " + window_end + ")\tgcper:" + gc_perc + "\tobs_exp:" + obs_exp + "\twin_length:" + window_size; + } + + public boolean is_island() { + _evaluate(); + return gc_perc > MIN_GC_PERCENTAGE && obs_exp > MIN_OBSERVED_TO_EXPECTED_CPG; } - private void evaluate() { - this.gcPerc = (double) this.gcCount / this.windowSize; - double expCpg = Math.pow(((double) this.gcCount / 2), 2) / this.windowSize; + private void _evaluate() { + gc_perc = gc_count / (double) window_size; + double exp_cpg = Math.pow((double) gc_count / 2, 2) / window_size; try { - this.obsExp = this.obsCpg / expCpg; + obs_exp = obs_cpg / exp_cpg; } catch (ArithmeticException e) { - this.obsExp = 0.0; + obs_exp = 0.0; } } - public boolean isIsland() { - this.evaluate(); - return this.gcPerc > MIN_GC_PERCENTAGE && this.obsExp > MIN_OBSERVED_TO_EXPECTED_CPG; - } - - private int count(String seq, String subsequence) { - int count = 0; - for (int i = 0; i < seq.length(); i++) { - if (seq.charAt(i) == subsequence.charAt(0)) { - if (seq.startsWith(subsequence, i)) { - count++; - } - } + public void update_gc_count() { + if (this.window_end > record_len) { + this.window_end = record_len; } - return count; - - } - - public void updateGcCount() { - String subsequence = this.record.substring(this.windowBegin, this.windowEnd); - this.gcCount = this.count(subsequence, String.valueOf(FIRST_NUCLEOTIDE)) + count(subsequence, String.valueOf(SECOND_NUCLEOTIDE)); - this.obsCpg = this.count(subsequence,String.valueOf(FIRST_NUCLEOTIDE) + String.valueOf(SECOND_NUCLEOTIDE)); - this.evaluate(); + String record_cut = record.substring(window_begin, window_end); + gc_count = this.patternGorC.matcher(record_cut).results().count(); + obs_cpg = countOccurrences(record_cut, FIRST_NUCLEOTIDE + String.valueOf(SECOND_NUCLEOTIDE)); + _evaluate(); } - public void shrinkLeft() { - // check if window begin is First or second nucleotide - if (charAt(this.record, this.windowBegin) == FIRST_NUCLEOTIDE || charAt(this.record, this.windowBegin) == SECOND_NUCLEOTIDE) { - this.gcCount--; + public void shrink_left() { + if (charAt(record, window_begin) == FIRST_NUCLEOTIDE || charAt(record, window_begin) == SECOND_NUCLEOTIDE) { + gc_count -= 1; } - if (charAt(this.record, this.windowBegin) == FIRST_NUCLEOTIDE && charAt(this.record, this.windowBegin + 1) == SECOND_NUCLEOTIDE) { - this.obsCpg--; + if (charAt(record, window_begin) == FIRST_NUCLEOTIDE && charAt(record,window_begin + 1) == SECOND_NUCLEOTIDE) { + obs_cpg -= 1; } - this.windowBegin++; + window_begin += 1; } - - public boolean expandRight() { - if (this.windowEnd >= this.recordLen) { + public boolean expand_right() { + if (window_end >= record_len) { return false; } - if (charAt(this.record, this.windowEnd) == FIRST_NUCLEOTIDE || charAt(this.record, this.windowEnd) == SECOND_NUCLEOTIDE) { - this.gcCount++; + if (charAt(record, window_end) == FIRST_NUCLEOTIDE || charAt(record, window_end) == SECOND_NUCLEOTIDE) { + gc_count += 1; } - if (charAt(this.record, this.windowEnd - 1) == FIRST_NUCLEOTIDE && charAt(this.record, this.windowEnd) == SECOND_NUCLEOTIDE) { - this.obsCpg++; + if (charAt(record,window_end - 1) == FIRST_NUCLEOTIDE && charAt(record,window_end) == SECOND_NUCLEOTIDE) { + obs_cpg += 1; } - this.windowEnd++; + window_end += 1; return true; } - - public void shrinkRight() { - if (charAt(this.record, this.windowEnd - 1) == FIRST_NUCLEOTIDE || charAt(this.record, this.windowEnd - 1) == SECOND_NUCLEOTIDE) { - this.gcCount--; + public void shrink_right() { + if (charAt(record,window_end - 1) == FIRST_NUCLEOTIDE || charAt(record, window_end - 1) == SECOND_NUCLEOTIDE) { + gc_count -= 1; } - if (charAt(this.record, this.windowEnd - 2) == FIRST_NUCLEOTIDE && charAt(this.record, this.windowEnd - 1) == SECOND_NUCLEOTIDE) { - this.obsCpg--; + if (charAt(record, window_end - 2) == FIRST_NUCLEOTIDE && charAt(record,window_end - 1) == SECOND_NUCLEOTIDE) { + obs_cpg -= 1; } - this.windowEnd--; + window_end -= 1; } - private void expandLeft() { - if (charAt(this.record, this.windowBegin - 1) == FIRST_NUCLEOTIDE || charAt(this.record, this.windowBegin - 1) == SECOND_NUCLEOTIDE) { - this.gcCount++; + private void expand_left() { + if (charAt(record,window_begin - 1) == FIRST_NUCLEOTIDE || charAt(record,window_begin - 1) == SECOND_NUCLEOTIDE) { + gc_count += 1; } - if (charAt(this.record, this.windowBegin - 2) == FIRST_NUCLEOTIDE && charAt(this.record, this.windowBegin - 1) == SECOND_NUCLEOTIDE) { - this.obsCpg++; + if (charAt(record, window_begin - 1) == FIRST_NUCLEOTIDE && charAt(record, window_begin) == SECOND_NUCLEOTIDE) { + obs_cpg += 1; } - this.windowBegin--; + window_begin -= 1; } - public boolean expandRightIsland() { - int remaining = this.recordLen - this.windowEnd; - int newWindowBegin; - if (remaining == 0) { + public boolean expand_right_island() { + int remaining_window_length = record_len - window_end; + if (remaining_window_length == 0) { return false; } - if (remaining < MIN_WINDOW_SIZE) { - newWindowBegin = this.recordLen - MIN_WINDOW_SIZE; + int new_window_begin; + if (remaining_window_length < MIN_WINDOW_SIZE) { + System.out.println("reached end of sequence in big jump, be careful"); + new_window_begin = record_len - MIN_WINDOW_SIZE; } else { - newWindowBegin = this.windowEnd; + new_window_begin = window_end; + } + CpgWindow next_window = new CpgWindow(record, new_window_begin, MIN_WINDOW_SIZE, MIN_WINDOW_SIZE, MIN_GC_PERCENTAGE, MIN_OBSERVED_TO_EXPECTED_CPG, FIRST_NUCLEOTIDE, SECOND_NUCLEOTIDE); + window_end = new_window_begin + MIN_WINDOW_SIZE; + window_size = window_end - window_begin; + return next_window.is_island(); + } + + public void rollback_until_island() { + CpgWindow last_window = new CpgWindow(record, window_end - MIN_WINDOW_SIZE, MIN_WINDOW_SIZE, MIN_WINDOW_SIZE, MIN_GC_PERCENTAGE, MIN_OBSERVED_TO_EXPECTED_CPG, FIRST_NUCLEOTIDE, SECOND_NUCLEOTIDE); + while (!last_window.is_island()) { + last_window.expand_left(); + last_window.shrink_right(); } - CpgWindow newWindow = new CpgWindow(this.record, newWindowBegin, this.MIN_WINDOW_SIZE, this.MIN_GC_PERCENTAGE, this.MIN_OBSERVED_TO_EXPECTED_CPG, this.MIN_WINDOW_SIZE, this.FIRST_NUCLEOTIDE, this.SECOND_NUCLEOTIDE); - this.windowEnd = newWindowBegin + this.MIN_WINDOW_SIZE; - this.windowSize = this.windowEnd - this.windowBegin; - return newWindow.isIsland(); + join_window(last_window); } - public void rollbackUntilIsland() { - CpgWindow lastWindow = new CpgWindow(this.record, this.windowEnd - this.MIN_WINDOW_SIZE, this.MIN_WINDOW_SIZE, this.MIN_GC_PERCENTAGE, this.MIN_OBSERVED_TO_EXPECTED_CPG, this.MIN_WINDOW_SIZE, this.FIRST_NUCLEOTIDE, this.SECOND_NUCLEOTIDE); - while (!lastWindow.isIsland()) { - lastWindow.expandLeft(); - lastWindow.shrinkRight(); + public void join_window(CpgWindow other_window) { + if (other_window.window_end > this.window_begin) { + window_end = other_window.window_end; + window_size = window_end - window_begin; + update_gc_count(); } - this.joinWindows(lastWindow); } - public void joinWindows(CpgWindow lastWindow) { - this.windowEnd = lastWindow.windowEnd; - this.windowSize = this.windowEnd - this.windowBegin; - this.updateGcCount(); + private static int countOccurrences(String str, String sub) { + int count = 0; + int idx = 0; + while ((idx = str.indexOf(sub, idx)) != -1) { + count++; + idx += sub.length(); + } + return count; } public static char charAt(String str, int index) { @@ -185,4 +176,12 @@ public class CpgWindow { return str.charAt(index); } + + public double getObsExp() { + return obs_exp; + } + + public double getGcPerc() { + return gc_perc; + } }