@article{blohm2025b, Author = {Blohm, Kolja and Korfkamp, David and Oesterling, Florian and Dählmann, Klaas and Schulze, Stefanie and Hein, Andreas}, Title = {Clustering Breast Cancer Patients Based on Their Treatment Courses Using German Cancer Registry Data}, Journal = {Methods of Information in Medicine}, Year = {2025}, Month = {December}, Publisher = {Georg Thieme Verlag KG}, Doi = {10.1055/a-2753-9631}, Url = {http://www.thieme-connect.com/products/ejournals/abstract/10.1055/a-2753-9631}, type = {article}, Abstract = {Cancer registries collect extensive data on cancer patients, including diagnoses, treatments, and disease progression. These data offer valuable insights into cancer care, but it is challenging to analyze due to its complexity. Machine learning techniques, particularly clustering, enable the exploration of treatment data to uncover previously unknown patterns and relationships. This work aimed to develop a method for clustering breast cancer patients in cancer registries based on their treatment courses, to demonstrate the usefulness of clustering for gaining insights, improving data quality, and identifying clinically relevant patterns. We developed a similarity measure adapted from the Levenshtein distance to compare treatment courses, incorporating cancer diagnosis, surgeries, radiotherapies, and systemic therapies. The method was evaluated on 17,822 breast cancer cases diagnosed in 2019 from the cancer registry of North Rhine-Westphalia. Evaluation involved two stages: first, domain experts reviewed the clustering results to assess clinical relevance and interpretability. Second, an intercluster survival analysis was performed to identify clinically relevant differences between treatment patterns.Expert evaluations confirmed that clustering produced clinically plausible groups while also uncovering unexpected treatment patterns and potential data inconsistencies. The survival analysis showed differences in survival between clusters in both prognostically favorable and unfavorable subgroups. These results demonstrate that treatment-course clustering can identify patient groups with differing survival outcomes. However, registry data incompleteness and unmeasured confounders may influence these findings. Clustering treatment courses in cancer registries can reveal data quality issues, distinguish groups with different prognostic profiles, and support exploratory analyses of treatment patterns. While these findings are not intended to guide clinical decision making or evaluate treatment effectiveness, they can help generate hypotheses, identify unexpected care pathways, and support quality monitoring within cancer registries. Future work should focus on improving treatment data completeness, incorporating additional clinical variables, and refining clustering methods for broader applicability.} } @COMMENT{Bibtex file generated on }