diff --git a/build.gradle b/build.gradle index 3658aa7baac11bf28bfba29e3bab35565642f9b6..35197726cd0ecd3c22401d2537662d7228ddba61 100644 --- a/build.gradle +++ b/build.gradle @@ -28,7 +28,7 @@ group = 'cz.dnaAnalyser' processResources { include '**' filter ReplaceTokens, tokens: [ - 'gitVersion' : version, + 'gitVersion': '3.7.1 CpG Islands', 'assemblyDate': Long.toString(System.currentTimeMillis()) ] } @@ -73,6 +73,7 @@ dependencies { implementation 'org.apache.commons:commons-email:1.+' implementation 'commons-io:commons-io:2.+' + // Export to CSV implementation'com.opencsv:opencsv:4.0' diff --git a/src/main/java/cz/mendelu/dnaAnalyser/analyse/cpg/Cpg.java b/src/main/java/cz/mendelu/dnaAnalyser/analyse/cpg/Cpg.java new file mode 100644 index 0000000000000000000000000000000000000000..5f290bbfb9ffbd8d79394e72de843e6784c04116 --- /dev/null +++ b/src/main/java/cz/mendelu/dnaAnalyser/analyse/cpg/Cpg.java @@ -0,0 +1,54 @@ +package cz.mendelu.dnaAnalyser.analyse.cpg; + +import cz.mendelu.dnaAnalyser.sequence.Sequence; +import cz.mendelu.dnaAnalyser.user.User; +import cz.mendelu.dnaAnalyser.utils.analyse.model.Analyse; +import cz.mendelu.dnaAnalyser.utils.model.Identifiable; +import lombok.*; + +import javax.persistence.Column; +import javax.persistence.Entity; +import javax.persistence.Table; +import java.util.Date; +import java.util.Set; +import java.util.UUID; + +@Data +@Entity +@ToString(callSuper = true) +@NoArgsConstructor +@EqualsAndHashCode(callSuper = true) +@Table(name = "cpg_analyses") +public class Cpg extends Analyse implements Identifiable { + + @Column(name = "min_window_size") + private Integer minWindowSize; + + @Column(name = "min_gc_percentage") + private Double minGcPercentage; + + @Column(name = "min_observed_to_expected_cpg") + private Double minObservedToExpectedCpG; + + @Column(name = "min_island_merge_gap") + private Integer minIslandMergeGap; + + @Column(name = "first_nucleotide") + private Character firstNucleotide; + + @Column(name = "second_nucleotide") + private Character secondNucleotide; + + @Builder + public Cpg(UUID id, Date created, Sequence sequence, User owner, Set<String> tags, Date finished, Long resultCount, + Integer minWindowSize, Double minGcPercentage, Double minObservedToExpectedCpG, Integer minIslandMergeGap, + Character firstNucleotide, Character secondNucleotide) { + super(id != null ? id : UUID.randomUUID(), created, sequence, owner, tags, finished, sequence.getName(), resultCount); + this.minWindowSize = minWindowSize != null ? minWindowSize : 200; + this.minGcPercentage = minGcPercentage != null ? minGcPercentage : 0.5; + this.minObservedToExpectedCpG = minObservedToExpectedCpG != null ? minObservedToExpectedCpG : 0.6; + this.minIslandMergeGap = minIslandMergeGap != null ? minIslandMergeGap : 100; + this.firstNucleotide = firstNucleotide != null ? firstNucleotide : 'C'; + this.secondNucleotide = secondNucleotide != null ? secondNucleotide : 'G'; + } +} diff --git a/src/main/java/cz/mendelu/dnaAnalyser/analyse/cpg/CpgAnalyseResult.java b/src/main/java/cz/mendelu/dnaAnalyser/analyse/cpg/CpgAnalyseResult.java new file mode 100644 index 0000000000000000000000000000000000000000..dc44b73bd897835bf451abac6396838a20bce0d7 --- /dev/null +++ b/src/main/java/cz/mendelu/dnaAnalyser/analyse/cpg/CpgAnalyseResult.java @@ -0,0 +1,32 @@ +package cz.mendelu.dnaAnalyser.analyse.cpg; + +import lombok.Data; +import lombok.NoArgsConstructor; +import lombok.AllArgsConstructor; + +import cz.mendelu.dnaAnalyser.utils.analyse.data.Localised; + +@Data +@NoArgsConstructor +@AllArgsConstructor +public class CpgAnalyseResult implements Localised { + // Window at (0, 799) gcper:0.37922403003754696 obs_exp:5.1172760840440485 win_length:799 + protected Long id; + + protected int position; + protected int end; + + protected String sequence; + + protected double gcPerc; + protected double observedToExpectedCpG; + + protected int length; + + + @Override + public int getMiddle() { + return (this.position + this.end) / 2; // Vráti strednĂş pozĂciu CpG ostrova + } + +} diff --git a/src/main/java/cz/mendelu/dnaAnalyser/analyse/cpg/CpgAnalyser.java b/src/main/java/cz/mendelu/dnaAnalyser/analyse/cpg/CpgAnalyser.java new file mode 100644 index 0000000000000000000000000000000000000000..41c360ff1d0b1a57768e15d1d600532723bb241b --- /dev/null +++ b/src/main/java/cz/mendelu/dnaAnalyser/analyse/cpg/CpgAnalyser.java @@ -0,0 +1,111 @@ +package cz.mendelu.dnaAnalyser.analyse.cpg; +import lombok.extern.slf4j.Slf4j; +import org.springframework.stereotype.Service; + +import java.util.ArrayList; +import java.util.List; + + +@Slf4j +@Service +public class CpgAnalyser { + + private static double MIN_GC_PERCENTAGE; + private static double MIN_OBSERVED_TO_EXPECTED_CPG; + private static int MIN_WINDOW_SIZE; + private static int MIN_ISLAND_MERGE_GAP; + private static char FIRST_NUCLEOTIDE; + private static char SECOND_NUCLEOTIDE; + + private static boolean findIsland1BpShifts(CpgWindow window) { + while(true) { + if (window.isIsland()) return true; + if (!window.expandRight()) return false; + window.shrinkLeft(); + } + } + + private static boolean twoFoldShrinking(CpgWindow window) { + while(!window.isIsland()) { + window.shrinkRight(); + window.shrinkLeft(); + window.windowSize = window.windowSize - 2; + if (window.windowSize < MIN_WINDOW_SIZE) return false; + } + return true; + } + + private static ArrayList<CpgWindow> mergeIslands(ArrayList<CpgWindow> foundIslands) { + ArrayList<CpgWindow> mergedIslands = new ArrayList<>(); + mergedIslands.add(foundIslands.get(0)); + if (foundIslands.size() >= 2) { + for (CpgWindow foundIsland : foundIslands.subList(1, foundIslands.size())) { + if (foundIsland.windowBegin - mergedIslands.get(mergedIslands.size() - 1).windowEnd < MIN_ISLAND_MERGE_GAP) { + mergedIslands.get(mergedIslands.size() - 1).joinWindows(foundIsland); + } else { + mergedIslands.add(foundIsland); + } + } + } + return mergedIslands; + } + + private static void extendIslandWindowShifts(CpgWindow window) { + while(true) { + boolean extended = window.expandRightIsland(); + if (!extended) break; + } + window.updateGcCount(); + } + + private static ArrayList<CpgWindow> findIslands(String sequence) { + if (sequence.length() < MIN_WINDOW_SIZE) { + return new ArrayList<>(); + } + ArrayList<CpgWindow> foundIslands = new ArrayList<>(); + int recordPosition = 0; + while(true) { + CpgWindow window = new CpgWindow(sequence, recordPosition, MIN_WINDOW_SIZE, MIN_GC_PERCENTAGE, MIN_OBSERVED_TO_EXPECTED_CPG, MIN_WINDOW_SIZE, FIRST_NUCLEOTIDE, SECOND_NUCLEOTIDE); + // step 1 find window that is an island + boolean recordContinues = findIsland1BpShifts(window); + if (!recordContinues) break; + + // step 2 window-length shifts + extendIslandWindowShifts(window); + + // step 3 shift the last window by 1bp until it meets the criteria + window.rollbackUntilIsland(); + + // step 4 shrink the whole island by 1bp until it meets the criteria + if(!twoFoldShrinking(window)){ + recordPosition = window.windowBegin + 1; + continue; + } + foundIslands.add(window); + recordPosition = window.windowEnd; + if (recordPosition + MIN_WINDOW_SIZE >= sequence.length()) break; + } + // step 5 at the end merge islands that are at least 100bp apart + if (foundIslands.isEmpty()) return foundIslands; + return mergeIslands(foundIslands); + } + + private void initialize(int minWindowSize, double minGcPercentage, double minObservedToExpectedCpG, int minIslandMergeGap, char firstNucleotide, char secondNucleotide) { + MIN_GC_PERCENTAGE = minGcPercentage; + MIN_OBSERVED_TO_EXPECTED_CPG = minObservedToExpectedCpG; + MIN_WINDOW_SIZE = minWindowSize; + MIN_ISLAND_MERGE_GAP = minIslandMergeGap; + FIRST_NUCLEOTIDE = firstNucleotide; + SECOND_NUCLEOTIDE = secondNucleotide; + } + + public ArrayList<CpgAnalyseResult> getResults(String sequence, int minWindowSize, double minGcPercentage, double minObservedToExpectedCpG, int minIslandMergeGap, char firstNucleotide, char secondNucleotide) { + initialize(minWindowSize, minGcPercentage, minObservedToExpectedCpG, minIslandMergeGap, firstNucleotide, secondNucleotide); + ArrayList<CpgWindow> islands = findIslands(sequence); + ArrayList<CpgAnalyseResult> results = new ArrayList<>(); + for (CpgWindow island : islands) { + results.add(new CpgAnalyseResult(null, island.windowBegin, island.windowEnd, island.record, island.getGcPerc(), island.getObsExp(), island.windowSize)); + } + return results; + } +} diff --git a/src/main/java/cz/mendelu/dnaAnalyser/analyse/cpg/CpgController.java b/src/main/java/cz/mendelu/dnaAnalyser/analyse/cpg/CpgController.java new file mode 100644 index 0000000000000000000000000000000000000000..b32d3d0d22dd20c1140498219bf9e0c37bee8d7f --- /dev/null +++ b/src/main/java/cz/mendelu/dnaAnalyser/analyse/cpg/CpgController.java @@ -0,0 +1,208 @@ +package cz.mendelu.dnaAnalyser.analyse.cpg; + +import cz.mendelu.dnaAnalyser.analyse.zdna.ZdnaAnalyseResult; +import cz.mendelu.dnaAnalyser.jwt.JwtAuthenticationUtils; +import cz.mendelu.dnaAnalyser.jwt.JwtTokenService; +import cz.mendelu.dnaAnalyser.sequence.SequenceService; +import cz.mendelu.dnaAnalyser.exporter.ExporterService; +import cz.mendelu.dnaAnalyser.utils.analyse.service.heatmap.Heatmap; +import cz.mendelu.dnaAnalyser.utils.controller.DeleteMapping; +import cz.mendelu.dnaAnalyser.utils.controller.GetMapping; +import cz.mendelu.dnaAnalyser.utils.controller.*; +import cz.mendelu.dnaAnalyser.utils.controller.PostMapping; +import cz.mendelu.dnaAnalyser.utils.controller.PutMapping; +import cz.mendelu.dnaAnalyser.utils.swagger.ApiPaginationParams; +import io.swagger.annotations.Api; +import io.swagger.annotations.ApiOperation; +import lombok.Builder; +import lombok.Data; +import org.springframework.beans.factory.annotation.Autowired; +import org.springframework.data.domain.Page; +import org.springframework.data.domain.PageImpl; +import org.springframework.data.domain.Pageable; +import org.springframework.http.ContentDisposition; +import org.springframework.http.HttpHeaders; +import org.springframework.http.HttpStatus; +import org.springframework.http.MediaType; +import org.springframework.http.ResponseEntity; +import org.springframework.security.access.prepost.PreAuthorize; +import org.springframework.security.core.Authentication; +import org.springframework.web.bind.annotation.*; +import springfox.documentation.annotations.ApiIgnore; +import cz.mendelu.dnaAnalyser.utils.analyse.data.ExporterEnums.Analysis; +import cz.mendelu.dnaAnalyser.utils.analyse.data.ExporterEnums.Output; + +import javax.validation.Valid; +import javax.validation.constraints.NotNull; + +import java.util.ArrayList; +import java.util.Optional; +import java.util.Set; +import java.util.UUID; + +import lombok.extern.slf4j.Slf4j; + +@Slf4j +@RestController +@Api(tags = "api/analyse/cpg") +@RequestMapping("api/analyse/cpg") +public class CpgController { + + @Data + static class CpgRequest { + @NotNull + private UUID sequence; + private Set<String> tags; + + private Integer minWindowSize; + private Double minGcPercentage; + private Double minObservedToExpectedCpG; + private Integer minIslandMergeGap; + private Character firstNucleotide; + private Character secondNucleotide; + } + + @Data + static class HeatMapRequest { + private Integer sequenceLength; + private UUID sequenceId; + } + + @Autowired + private ExporterService exporterService; + + @Autowired + private SequenceService sequenceService; + + @Autowired + private JwtTokenService jwtTokenService; + + @Autowired + private CpgService cpgService; + + @PostMapping() + @ResponseStatus(HttpStatus.OK) + @PreAuthorize("isAuthenticated()") + @ApiOperation("Analyse sequence for CpG Islands") + public Response<Cpg> analyzeCpg(@ApiIgnore Authentication authentication, @RequestBody @Valid CpgController.CpgRequest request) { + Cpg cpg = cpgService.executeCpgAnalysis( + Cpg.builder() + .sequence(sequenceService.findOne(request.getSequence())) + .owner(JwtAuthenticationUtils.toUser(authentication)) + .tags(request.getTags()) + .minWindowSize(request.getMinWindowSize()) + .minGcPercentage(request.getMinGcPercentage()) + .minObservedToExpectedCpG(request.getMinObservedToExpectedCpG()) + .minIslandMergeGap(request.getMinIslandMergeGap()) + .firstNucleotide(request.getFirstNucleotide()) + .secondNucleotide(request.getSecondNucleotide()) + .build()); + return Response.response(cpg); + } + + @GetMapping("/{id}/analysis") + @ResponseStatus(HttpStatus.OK) + @PreAuthorize("isAuthenticated()") + @ApiOperation("Get one CpG analysis by ID.") + public Response<Cpg> getCpgAnalysis(@PathVariable UUID id) { + Cpg cpg = cpgService.findOne(id); + Response<Cpg> response = Response.response(cpg); + response.setDownloadToken(jwtTokenService.generateDownloadToken()); + return response; + } + + @GetMapping("/{id}/cpg") + @ResponseStatus(HttpStatus.OK) + @PreAuthorize("isAuthenticated()") + @ApiOperation("Get CpG islands analysis by ID.") + public ResponsePage<CpgAnalyseResult> getCpgList(@PathVariable UUID id) { + Page<CpgAnalyseResult> cpgPage = cpgService.returnCpgAnalysis(id); + return ResponsePage.responsePage(cpgPage); + } + + @GetMapping("/{id}/average/length") + @ResponseStatus(HttpStatus.OK) + @PreAuthorize("isAuthenticated()") + @ApiOperation("Get average CPG length for single analysis") + public Integer getAvgCpGLen(@PathVariable UUID id) + { + return cpgService.avgCpgLen(id); + } + + @GetMapping + @ResponseStatus(HttpStatus.OK) + @PreAuthorize("isAuthenticated()") + @ApiPaginationParams + @ApiOperation("Get page with CpG analysis.") + public ResponsePage<Cpg> getPageWithCpgAnalysis(PaginationParam paginationParam, SortParam sortParam, TagFilterParam tagFilterParam) { + Pageable pageable = paginationParam.toPageable(sortParam.toSort()); + Page<Cpg> cpgPage = cpgService.findAll(pageable, tagFilterParam.getTags()); + return ResponsePage.responsePage(cpgPage); + } + + @GetMapping("/tag") + @ResponseStatus(HttpStatus.OK) + @PreAuthorize("isAuthenticated()") + @ApiPaginationParams + @ApiOperation("Get all tags defined for sequences") + public ResponsePage<String> getPageWithTags(PaginationParam paginationParam, SortParam sortParam) { + Pageable pageable = paginationParam.toPageable(sortParam.toSort()); + Page<String> tagPage = cpgService.findAllTags(pageable); + return ResponsePage.responsePage(tagPage); + } + + @PutMapping("/{id}/tags") + @ResponseStatus(HttpStatus.ACCEPTED) + @PreAuthorize("isAuthenticated()") + @ApiOperation("Modify tags") + public Response<Cpg> modifyTags(@PathVariable UUID id, @RequestBody TagModifyRequest request) { + Cpg cpg = cpgService.modifyTags(id, request.getTags()); + return Response.response(cpg); + } + + @DeleteMapping("/{id}") + @ResponseStatus(HttpStatus.NO_CONTENT) + @PreAuthorize("isAuthenticated()") + @ApiOperation("Delete one CpG analysis by ID") + public void deleteCpgAnalysis(@PathVariable UUID id) { + cpgService.delete(id); + } + + + @GetMapping(path = "/{id}/cpg.csv", produces = MediaType.TEXT_PLAIN_VALUE) + @ResponseStatus(HttpStatus.OK) + @ApiOperation("Get CSV file.") + public ResponseEntity<String> getCsv(@PathVariable UUID id) { + String csv = exporterService.exportData(id, Analysis.CPG, Output.CSV); + HttpHeaders headers = new HttpHeaders(); + headers.setContentDisposition(ContentDisposition.builder("attachment").filename(id + ".csv").build()); + headers.setContentType(MediaType.valueOf("text/csv")); + headers.setContentLength(csv.length()); + return new ResponseEntity<>(csv, headers, HttpStatus.OK); + } + + @GetMapping(path = "/{id}/cpg.bedgraph", produces = MediaType.TEXT_PLAIN_VALUE) + @ResponseStatus(HttpStatus.OK) + @ApiOperation("Get BEDGRAPH file.") + public ResponseEntity<String> getBedgraph(@PathVariable UUID id) { + String bedgraph = exporterService.exportData(id, Analysis.CPG, Output.BEDGRAPH); + HttpHeaders headers = new HttpHeaders(); + headers.setContentDisposition(ContentDisposition.builder("attachment").filename(id + ".bedgraph").build()); + headers.setContentType(MediaType.valueOf("text/bedgraph")); + headers.setContentLength(bedgraph.length()); + return new ResponseEntity<>(bedgraph, headers, HttpStatus.OK); + } + + @GetMapping("/{id}/heatmap") + @ResponseStatus(HttpStatus.OK) + @PreAuthorize("isAuthenticated()") + @ApiOperation("Get CpG heatmap by Id.") + public Heatmap getCpgHeatmap( + @PathVariable UUID id, + Integer segments, + Integer from, + Integer to) + { + return cpgService.getHeatmap(id, segments, from, to); + } +} diff --git a/src/main/java/cz/mendelu/dnaAnalyser/analyse/cpg/CpgDataSessionService.java b/src/main/java/cz/mendelu/dnaAnalyser/analyse/cpg/CpgDataSessionService.java new file mode 100644 index 0000000000000000000000000000000000000000..02eeaa2e46c5df09e0ecf93d0219ff5caa55225c --- /dev/null +++ b/src/main/java/cz/mendelu/dnaAnalyser/analyse/cpg/CpgDataSessionService.java @@ -0,0 +1,27 @@ +package cz.mendelu.dnaAnalyser.analyse.cpg; + +import cz.mendelu.dnaAnalyser.overlay.LocalisedOverlay; +import cz.mendelu.dnaAnalyser.utils.analyse.service.AbstractDataSessionService; +import cz.mendelu.dnaAnalyser.utils.analyse.service.heatmap.HeatmapService; +import org.apache.ibatis.session.Configuration; +import org.springframework.stereotype.Service; + +import java.sql.SQLException; +import java.sql.Statement; + +@Service +public class CpgDataSessionService extends AbstractDataSessionService implements HeatmapService, LocalisedOverlay { + + @Override + protected void onCreateTables(Statement statement) throws SQLException { + // Predpokladá sa, Ĺľe CpgMapper obsahuje SQL prĂkaz pre vytvorenie tabuÄľky pre CpG analĂ˝zy + statement.execute(CpgMapper.TABLE); + } + + @Override + protected void onConfiguration(Configuration configuration) { + // Registrácia aliasu a mappera pre CpG analĂ˝zu + configuration.getTypeAliasRegistry().registerAlias(Cpg.class); + configuration.addMapper(CpgMapper.class); + } +} diff --git a/src/main/java/cz/mendelu/dnaAnalyser/analyse/cpg/CpgMapper.java b/src/main/java/cz/mendelu/dnaAnalyser/analyse/cpg/CpgMapper.java new file mode 100644 index 0000000000000000000000000000000000000000..66cca0124e2499e582a15c9ac3cd7fc5db960b3c --- /dev/null +++ b/src/main/java/cz/mendelu/dnaAnalyser/analyse/cpg/CpgMapper.java @@ -0,0 +1,111 @@ +package cz.mendelu.dnaAnalyser.analyse.cpg; + +import cz.mendelu.dnaAnalyser.analyse.zdna.ZdnaAnalyseResult; +import cz.mendelu.dnaAnalyser.utils.analyse.data.DataLocalisedMapper; +import org.apache.ibatis.annotations.*; +import org.apache.ibatis.cursor.Cursor; +import org.apache.ibatis.session.RowBounds; + +import java.util.ArrayList; +import java.util.List; + +public interface CpgMapper extends DataLocalisedMapper<CpgAnalyseResult> { + + // language=SQL + String TABLE = "CREATE TABLE IF NOT EXISTS CPG (" + + "ID integer NOT NULL AUTO_INCREMENT, " + + "position integer, " + + "end integer, " + + "sequence varchar, " + + "gcPerc float, " + + "observedToExpectedCpG float, " + + "length integer, " + + "PRIMARY KEY (ID))"; + + @Select("SELECT count(*) FROM CPG") + int countAll(); + + @Select("SELECT * FROM CPG") + @Results(value = { + @Result(property = "id", column = "ID"), + @Result(property = "position", column = "position"), + @Result(property = "end", column = "end"), + @Result(property = "sequence", column = "sequence"), + @Result(property = "gcPerc", column = "gcPerc"), + @Result(property = "observedToExpectedCpG", column = "observedToExpectedCpG"), + @Result(property = "length", column = "length") + }) + ArrayList<CpgAnalyseResult> getAll(); + + @Select("SELECT * FROM CPG WHERE position > #{position} AND end < #{end}") + @Results(value = { + @Result(property = "id", column = "ID"), + @Result(property = "position", column = "position"), + @Result(property = "end", column = "end"), + @Result(property = "sequence", column = "sequence"), + @Result(property = "gcPerc", column = "gcPerc"), + @Result(property = "observedToExpectedCpG", column = "observedToExpectedCpG"), + @Result(property = "length", column = "length") + }) + ArrayList<CpgAnalyseResult> getByWindow(Integer position, Integer end); + + @Select("SELECT * FROM CPG") + @Results(value = { + @Result(property = "id", column = "ID"), + @Result(property = "position", column = "position"), + @Result(property = "end", column = "end"), + @Result(property = "sequence", column = "sequence"), + @Result(property = "gcPerc", column = "gcPerc"), + @Result(property = "observedToExpectedCpG", column = "observedToExpectedCpG"), + @Result(property = "length", column = "length") + }) + Cursor<CpgAnalyseResult> getCursor(); + + @Select("SELECT * FROM CPG WHERE ID = #{id}") + @Results(value = { + @Result(property = "id", column = "ID"), + @Result(property = "position", column = "position"), + @Result(property = "end", column = "end"), + @Result(property = "sequence", column = "sequence"), + @Result(property = "gcPerc", column = "gcPerc"), + @Result(property = "observedToExpectedCpG", column = "observedToExpectedCpG"), + @Result(property = "length", column = "length") + }) + CpgAnalyseResult getById(int id); + + @Update("UPDATE CPG SET " + + "position = #{position}, " + + "end = #{end}, " + + "sequence = #{sequence}, " + + "gcPerc = #{gcPerc}, " + + "observedToExpectedCpG = #{observedToExpectedCpG} " + + "length = #{length} " + + "WHERE ID = #{id}") + void update(CpgAnalyseResult result); + + @Select("SELECT * FROM CPG WHERE POSITION >= #{startPosition} AND POSITION < #{endPosition} ORDER BY ${orderName} ASC") + @Results(value = { + @Result(property = "id", column = "ID"), + @Result(property = "position", column = "position"), + @Result(property = "end", column = "end"), + @Result(property = "sequence", column = "sequence"), + @Result(property = "gcPerc", column = "gcPerc"), + @Result(property = "observedToExpectedCpG", column = "observedToExpectedCpG"), + @Result(property = "length", column = "length") + }) + List<CpgAnalyseResult> getAllBetweenPositionOrderAsc( + @Param("startPosition") int startPosition, + @Param("endPosition") int endPosition, + @Param("orderName") String orderName, + RowBounds rowBounds); + + @Delete("DELETE from CPG WHERE ID = #{id}") + void delete(int id); + + @Insert("INSERT INTO CPG (position, end, sequence, gcPerc, observedToExpectedCpG, length)" + + " VALUES " + + "(#{position}, #{end}, #{sequence}, #{gcPerc}, #{observedToExpectedCpG}, #{length})" + ) + @Options(useGeneratedKeys = true, keyProperty = "id") + void insert(CpgAnalyseResult result); +} diff --git a/src/main/java/cz/mendelu/dnaAnalyser/analyse/cpg/CpgRepository.java b/src/main/java/cz/mendelu/dnaAnalyser/analyse/cpg/CpgRepository.java new file mode 100644 index 0000000000000000000000000000000000000000..39d7bdcfea00ab067b9a5aff7d971d55dd229044 --- /dev/null +++ b/src/main/java/cz/mendelu/dnaAnalyser/analyse/cpg/CpgRepository.java @@ -0,0 +1,27 @@ +package cz.mendelu.dnaAnalyser.analyse.cpg; + + +import cz.mendelu.dnaAnalyser.utils.repository.BaseRepository; +import org.springframework.data.domain.Page; +import org.springframework.data.domain.Pageable; +import org.springframework.data.jpa.repository.Query; +import org.springframework.data.repository.query.Param; + +import java.util.Optional; +import java.util.UUID; + +public interface CpgRepository extends BaseRepository<Cpg, UUID> { + + @Query("select a from Cpg as a join a.owner o where o.id = ?#{principal.id}") + Page<Cpg> findAll(Pageable pageable); + + @Query("select a from Cpg as a join a.owner o join a.tags t where t = :tag and o.id = ?#{principal.id}") + Page<Cpg> findAll(Pageable pageable, @Param("tag") String tag); + + @Query("select a from Cpg as a join a.owner o where a.id = :id and o.id = ?#{principal.id}") + Optional<Cpg> findById(@Param("id") UUID id); + + @Query("select distinct t from Cpg as s join s.owner o join s.tags t where o.id = ?#{principal.id}") + Page<String> findAllTags(Pageable pageable); + +} diff --git a/src/main/java/cz/mendelu/dnaAnalyser/analyse/cpg/CpgService.java b/src/main/java/cz/mendelu/dnaAnalyser/analyse/cpg/CpgService.java new file mode 100644 index 0000000000000000000000000000000000000000..895d24c8e6215b18440a6feff5b1c1d7b38fb2fd --- /dev/null +++ b/src/main/java/cz/mendelu/dnaAnalyser/analyse/cpg/CpgService.java @@ -0,0 +1,138 @@ +package cz.mendelu.dnaAnalyser.analyse.cpg; + +import cz.mendelu.dnaAnalyser.analyse.zdna.Zdna; +import cz.mendelu.dnaAnalyser.analyse.zdna.ZdnaAnalyseResult; +import cz.mendelu.dnaAnalyser.analyse.zdna.ZdnaMapper; +import cz.mendelu.dnaAnalyser.batch.BatchService; +import cz.mendelu.dnaAnalyser.sequence.Sequence; +import cz.mendelu.dnaAnalyser.sequence.data.SequenceData; +import cz.mendelu.dnaAnalyser.sequence.data.SequenceDataRepository; +import cz.mendelu.dnaAnalyser.sequence.stream.BufferedWindow; +import cz.mendelu.dnaAnalyser.sequence.stream.Window; +import cz.mendelu.dnaAnalyser.utils.analyse.service.heatmap.Heatmap; +import cz.mendelu.dnaAnalyser.utils.exception.NotFoundException; +import cz.mendelu.dnaAnalyser.utils.repository.BaseRepository; +import cz.mendelu.dnaAnalyser.utils.service.ModelService; +import lombok.extern.slf4j.Slf4j; +import org.apache.ibatis.session.RowBounds; +import org.apache.ibatis.session.SqlSession; +import org.springframework.beans.factory.annotation.Autowired; +import org.springframework.stereotype.Service; +import org.springframework.data.domain.Page; +import org.springframework.data.domain.Pageable; +import org.springframework.data.domain.PageImpl; +import org.springframework.data.domain.Sort; + +import java.util.*; + +@Service +@Slf4j +public class CpgService extends ModelService<Cpg> { + + @Autowired + private CpgRepository cpgRepository; + + @Autowired + private SequenceDataRepository sequenceDataRepository; + + @Autowired + private BatchService batchService; + + @Autowired + private CpgAnalyser cpgAnalyser; + + @Autowired + private CpgDataSessionService cpgDataSessionService; + + @Override + protected BaseRepository<Cpg, UUID> getRepository() { + return cpgRepository; + } + + public Cpg executeCpgAnalysis(Cpg cpg) { + + cpg = cpgRepository.save(cpg); + batchService.createBatch(cpg.getOwner(), cpg) + .execute(uuid -> { + Cpg analysis = cpgRepository + .findById(uuid) + .orElseThrow(() -> new NotFoundException(Cpg.class, uuid)); + Sequence sequence = analysis.getSequence(); + SequenceData sequenceData = sequenceDataRepository.load(sequence); + + int minWindowSize = analysis.getMinWindowSize(); + double minGcPercentage = analysis.getMinGcPercentage(); + double minObservedToExpectedCpG = analysis.getMinObservedToExpectedCpG(); + int minIslandMergeGap = analysis.getMinIslandMergeGap(); + char firstNucleotide = analysis.getFirstNucleotide(); + char secondNucleotide = analysis.getSecondNucleotide(); + + if (sequence.getLength() < minWindowSize) { + throw new IllegalArgumentException(String.format("The window (%d) must be smaller than the sequence length (%d).", minWindowSize, sequence.getLength())); + } + + try (SqlSession sqlSession = cpgDataSessionService.openDataSession(analysis)) { + CpgMapper cpgMapper = sqlSession.getMapper(CpgMapper.class); + + ArrayList<CpgAnalyseResult> results = cpgAnalyser.getResults(sequenceData.toPlain(), minWindowSize, minGcPercentage, minObservedToExpectedCpG, minIslandMergeGap, firstNucleotide, secondNucleotide); + + results.forEach(cpgMapper::insert); + + analysis.setResultCount((long) results.size()); + analysis.finishNow(); + sqlSession.commit(); + } + cpgRepository.save(analysis); + }).start(); + return cpg; + } + + public Page<CpgAnalyseResult> returnCpgAnalysis(UUID id) { + Cpg analysis = findOne(id); + try (SqlSession sqlSession = cpgDataSessionService.openDataSession(analysis)) { + CpgMapper cpgMapper = sqlSession.getMapper(CpgMapper.class); + Page<CpgAnalyseResult> cpgPage = new PageImpl<>(cpgMapper.getAll()); + return cpgPage; + } + } + + public Heatmap getHeatmap(UUID id, Integer segmentsCount, Integer from, Integer to) { + Cpg analysis = findOne(id); + return cpgDataSessionService.heatmap(CpgMapper.class, analysis, segmentsCount, from, to); + } + + public Cpg modifyTags(UUID id, Set<String> tags) { + Cpg analysis = cpgRepository.findById(id) + .orElseThrow(() -> new NotFoundException(Cpg.class, id)); + analysis.setTags(tags); + cpgRepository.save(analysis); + return analysis; + } + + public int avgCpgLen(UUID id) { + Cpg analyse = findOne(id); + int result; + + try (SqlSession sqlSession = cpgDataSessionService.openDataSession(analyse)) { + CpgMapper cpgMapper = sqlSession.getMapper(CpgMapper.class); + ArrayList<CpgAnalyseResult> analysisCpg = cpgMapper.getAll(); + + int totalCpgLen = analysisCpg.stream() + .mapToInt(e -> e.length) + .sum(); + + result = analysisCpg.size() != 0 + ? totalCpgLen / analysisCpg.size() + : 0; + } + + return result; + } + + private Sort.Order getSortOrderOrNull(Pageable pageable) { + if (pageable == null) return null; + Iterator<Sort.Order> orderIterator = pageable.getSort().iterator(); + if (!orderIterator.hasNext()) return null; + return orderIterator.next(); + } +} diff --git a/src/main/java/cz/mendelu/dnaAnalyser/analyse/cpg/CpgWindow.java b/src/main/java/cz/mendelu/dnaAnalyser/analyse/cpg/CpgWindow.java new file mode 100644 index 0000000000000000000000000000000000000000..479324c62b766eb9c2f4b5f7979b824a401c74ae --- /dev/null +++ b/src/main/java/cz/mendelu/dnaAnalyser/analyse/cpg/CpgWindow.java @@ -0,0 +1,188 @@ +package cz.mendelu.dnaAnalyser.analyse.cpg; + + +import lombok.Getter; + +public class CpgWindow { + + public final String record; + private final int recordLen; + public int windowBegin; + public int windowSize; + public int windowEnd; + private int gcCount; + private double obsCpg; + @Getter + private double obsExp; + @Getter + private double gcPerc; + private final double MIN_GC_PERCENTAGE; + private final double MIN_OBSERVED_TO_EXPECTED_CPG; + private final int MIN_WINDOW_SIZE; + private final char FIRST_NUCLEOTIDE; + private final char SECOND_NUCLEOTIDE; + + public CpgWindow( String sequence, + int windowBegin, + int windowSize, + double MIN_GC_PERCENTAGE, + double MIN_OBSERVED_TO_EXPECTED_CPG, + int MIN_WINDOW_SIZE, + char FIRST_NUCLEOTIDE, + char SECOND_NUCLEOTIDE) { + + this.record = sequence; + this.windowBegin = windowBegin; + this.windowSize = windowSize; + this.recordLen = sequence.length(); + this.windowEnd = windowBegin + windowSize; + this.gcCount = 0; + this.obsCpg = 0.0; + this.obsExp = 0.0; + this.gcPerc = 0.0; + this.MIN_GC_PERCENTAGE = MIN_GC_PERCENTAGE; + this.MIN_OBSERVED_TO_EXPECTED_CPG = MIN_OBSERVED_TO_EXPECTED_CPG; + this.MIN_WINDOW_SIZE = MIN_WINDOW_SIZE; + this.FIRST_NUCLEOTIDE = FIRST_NUCLEOTIDE; + this.SECOND_NUCLEOTIDE = SECOND_NUCLEOTIDE; + this.updateGcCount(); + + } + + + public String toString() { + return "Window at (" + this.windowBegin + ", " + this.windowEnd + ")\tgcper:" + this.gcPerc + "\tobs_exp:" + this.obsExp + "\twin_length:" + this.windowSize; + } + + private void evaluate() { + this.gcPerc = (double) this.gcCount / this.windowSize; + double expCpg = Math.pow(((double) this.gcCount / 2), 2) / this.windowSize; + try { + this.obsExp = this.obsCpg / expCpg; + } catch (ArithmeticException e) { + this.obsExp = 0.0; + } + } + + public boolean isIsland() { + this.evaluate(); + return this.gcPerc > MIN_GC_PERCENTAGE && this.obsExp > MIN_OBSERVED_TO_EXPECTED_CPG; + } + + private int count(String seq, String subsequence) { + int count = 0; + for (int i = 0; i < seq.length(); i++) { + if (seq.charAt(i) == subsequence.charAt(0)) { + if (seq.startsWith(subsequence, i)) { + count++; + } + } + } + return count; + + } + + public void updateGcCount() { + String subsequence = this.record.substring(this.windowBegin, this.windowEnd); + this.gcCount = this.count(subsequence, String.valueOf(FIRST_NUCLEOTIDE)) + count(subsequence, String.valueOf(SECOND_NUCLEOTIDE)); + this.obsCpg = this.count(subsequence,String.valueOf(FIRST_NUCLEOTIDE) + String.valueOf(SECOND_NUCLEOTIDE)); + this.evaluate(); + } + + public void shrinkLeft() { + // check if window begin is First or second nucleotide + if (charAt(this.record, this.windowBegin) == FIRST_NUCLEOTIDE || charAt(this.record, this.windowBegin) == SECOND_NUCLEOTIDE) { + this.gcCount--; + } + if (charAt(this.record, this.windowBegin) == FIRST_NUCLEOTIDE && charAt(this.record, this.windowBegin + 1) == SECOND_NUCLEOTIDE) { + this.obsCpg--; + } + this.windowBegin++; + } + + + public boolean expandRight() { + if (this.windowEnd >= this.recordLen) { + return false; + } + if (charAt(this.record, this.windowEnd) == FIRST_NUCLEOTIDE || charAt(this.record, this.windowEnd) == SECOND_NUCLEOTIDE) { + this.gcCount++; + } + if (charAt(this.record, this.windowEnd - 1) == FIRST_NUCLEOTIDE && charAt(this.record, this.windowEnd) == SECOND_NUCLEOTIDE) { + this.obsCpg++; + } + this.windowEnd++; + return true; + } + + + public void shrinkRight() { + if (charAt(this.record, this.windowEnd - 1) == FIRST_NUCLEOTIDE || charAt(this.record, this.windowEnd - 1) == SECOND_NUCLEOTIDE) { + this.gcCount--; + } + if (charAt(this.record, this.windowEnd - 2) == FIRST_NUCLEOTIDE && charAt(this.record, this.windowEnd - 1) == SECOND_NUCLEOTIDE) { + this.obsCpg--; + } + this.windowEnd--; + } + + private void expandLeft() { + if (charAt(this.record, this.windowBegin - 1) == FIRST_NUCLEOTIDE || charAt(this.record, this.windowBegin - 1) == SECOND_NUCLEOTIDE) { + this.gcCount++; + } + if (charAt(this.record, this.windowBegin - 2) == FIRST_NUCLEOTIDE && charAt(this.record, this.windowBegin - 1) == SECOND_NUCLEOTIDE) { + this.obsCpg++; + } + this.windowBegin--; + } + + public boolean expandRightIsland() { + int remaining = this.recordLen - this.windowEnd; + int newWindowBegin; + if (remaining == 0) { + return false; + } + if (remaining < MIN_WINDOW_SIZE) { + newWindowBegin = this.recordLen - MIN_WINDOW_SIZE; + } else { + newWindowBegin = this.windowEnd; + } + CpgWindow newWindow = new CpgWindow(this.record, newWindowBegin, this.MIN_WINDOW_SIZE, this.MIN_GC_PERCENTAGE, this.MIN_OBSERVED_TO_EXPECTED_CPG, this.MIN_WINDOW_SIZE, this.FIRST_NUCLEOTIDE, this.SECOND_NUCLEOTIDE); + this.windowEnd = newWindowBegin + this.MIN_WINDOW_SIZE; + this.windowSize = this.windowEnd - this.windowBegin; + return newWindow.isIsland(); + } + + public void rollbackUntilIsland() { + CpgWindow lastWindow = new CpgWindow(this.record, this.windowEnd - this.MIN_WINDOW_SIZE, this.MIN_WINDOW_SIZE, this.MIN_GC_PERCENTAGE, this.MIN_OBSERVED_TO_EXPECTED_CPG, this.MIN_WINDOW_SIZE, this.FIRST_NUCLEOTIDE, this.SECOND_NUCLEOTIDE); + while (!lastWindow.isIsland()) { + lastWindow.expandLeft(); + lastWindow.shrinkRight(); + } + this.joinWindows(lastWindow); + } + + public void joinWindows(CpgWindow lastWindow) { + this.windowEnd = lastWindow.windowEnd; + this.windowSize = this.windowEnd - this.windowBegin; + this.updateGcCount(); + } + + public static char charAt(String str, int index) { + if (str == null) { + throw new NullPointerException("String is null"); + } + + // If the index is negative, adjust it to access the character from the end of the string + if (index < 0) { + index += str.length(); + } + + // Check if the adjusted index is still out of range + if (index < 0 || index >= str.length()) { + throw new StringIndexOutOfBoundsException("String index out of range: " + index); + } + + return str.charAt(index); + } +} diff --git a/src/main/java/cz/mendelu/dnaAnalyser/utils/analyse/data/ExporterEnums.java b/src/main/java/cz/mendelu/dnaAnalyser/utils/analyse/data/ExporterEnums.java index 0fe250dc9c6b38c2f4f2a28b8655650bc99248cd..dc1d47a90d9a6bbd539fbc451df62c8def470f81 100644 --- a/src/main/java/cz/mendelu/dnaAnalyser/utils/analyse/data/ExporterEnums.java +++ b/src/main/java/cz/mendelu/dnaAnalyser/utils/analyse/data/ExporterEnums.java @@ -6,7 +6,8 @@ public interface ExporterEnums { AGGREGATE, QUADRUPLEX, RLOOP, - ZDNA + ZDNA, + CPG } public enum Output { diff --git a/src/main/resources/application.yaml b/src/main/resources/application.yaml index 29f3aaba261cee005afb964fb95603d1777e91fc..b7e8d473a247e5ac0be8a617c173a74776db518d 100644 --- a/src/main/resources/application.yaml +++ b/src/main/resources/application.yaml @@ -40,6 +40,7 @@ info: g4killer: true rloopr: true zdna: true + cpg: true limits: multipart: maxFileSize: ${spring.servlet.multipart.max-file-size} diff --git a/src/main/resources/db/migration/V15__Create_cpg_analyses_table.sql b/src/main/resources/db/migration/V15__Create_cpg_analyses_table.sql new file mode 100644 index 0000000000000000000000000000000000000000..960b8cae6e91d7e421d6543722d5b3f9998700e2 --- /dev/null +++ b/src/main/resources/db/migration/V15__Create_cpg_analyses_table.sql @@ -0,0 +1,10 @@ +CREATE TABLE cpg_analyses ( + id uuid references analyses (id), + min_window_size INTEGER, + min_gc_percentage DOUBLE PRECISION, + min_observed_to_expected_cpg DOUBLE PRECISION, + min_island_merge_gap INTEGER, + first_nucleotide CHAR(1), + second_nucleotide CHAR(1), + primary key (id) +);