oard-kg
Open Annotations for Rare Diseases (OARD) Knowledge Graph
Clinical associations between rare diseases and phenotypes derived from electronic health records
48.3M
triples
0
classes
15
properties
10.1M
subjects
OARD-KG is an RDF triplestore which provides a SPARQL query endpoint for clinical associations between rare diseases and phenotypes derived from electronic health records.
Get diseases most strongly associated with a phenotype (Increased total monocyte count)
PREFIX biolink: <https://w3id.org/biolink/vocab/>
PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>
PREFIX hp: <http://purl.obolibrary.org/obo/HP_>
SELECT ?disease ?label ?data_set ?pair_count ?log_odds ?lower_ci ?upper_ci
WHERE {
VALUES ?phenotype {hp:0012311} # Increased total monocyte count
# Find associated phenotypes
?assoc biolink:subject ?disease ;
biolink:object ?phenotype .
?disease biolink:category biolink:Disease ;
rdfs:label ?label .
# Get study
?assoc biolink:has_supporting_studies ?study .
# Get Data Set (currently buried in Count Result. TODO: move to Study node)
?study biolink:has_study_results ?results_counts .
?results_counts biolink:category biolink:ConceptCountAnalysisResult ;
biolink:supporting_data_set ?data_set .
# Get Log Odds Analysis Result
?study biolink:has_study_results ?results_log_odds .
?results_log_odds biolink:category biolink:LogOddsAnalysisResult ;
biolink:log_odds_ratio ?log_odds ;
biolink:total_sample_size ?pair_count .
# Log odds confidence interval is represented as two unordered values
?results_log_odds biolink:log_odds_ratio_95_ci ?lower_ci , ?upper_ci .
FILTER(?lower_ci < ?upper_ci)
# Use the lower bound of the CI (value closest to 0) as an indication of association strength with high confidence
BIND(IF(ABS(?lower_ci) < ABS(?upper_ci), ABS(?lower_ci), ABS(?upper_ci)) AS ?dist_to_zero)
}
ORDER BY DESC(?dist_to_zero)
LIMIT 100
graph TD
classDef projected fill:lightgreen;
classDef literal fill:orange;
classDef iri fill:yellow;
v5("?assoc")
v10("?data_set"):::projected
v6("?disease"):::projected
v14("?dist_to_zero")
v7("?label"):::projected
v12("?log_odds"):::projected
v2("?lower_ci"):::projected
v13("?pair_count"):::projected
v4("?phenotype")
v9("?results_counts")
v11("?results_log_odds")
v8("?study")
v3("?upper_ci"):::projected
c8([biolink:ConceptCountAnalysisResult]):::iri
c4([biolink:Disease]):::iri
c10([biolink:LogOddsAnalysisResult]):::iri
f0[["?lower_ci < ?upper_ci"]]
f0 --> v2
f0 --> v3
bind1[/VALUES ?phenotype/]
bind1-->v4
bind10([obo:HP_0012311])
bind10 --> bind1
v5 --"biolink:subject"--> v6
v5 --"biolink:object"--> v4
v6 --"biolink:category"--> c4
v6 --"rdfs:label"--> v7
v5 --"biolink:has_supporting_studies"--> v8
v8 --"biolink:has_study_results"--> v9
v9 --"biolink:category"--> c8
v9 --"biolink:supporting_data_set"--> v10
v8 --"biolink:has_study_results"--> v11
v11 --"biolink:category"--> c10
v11 --"biolink:log_odds_ratio"--> v12
v11 --"biolink:total_sample_size"--> v13
v11 --"biolink:log_odds_ratio_95_ci"--> v2
v11 --"biolink:log_odds_ratio_95_ci"--> v3
bind2[/"if(numeric-abs(?lower_ci) < numeric-abs(?upper_ci),numeric-abs(?lower_ci),numeric-abs(?upper_ci))"/]
v2 --o bind2
v3 --o bind2
bind2 --as--o v14
Get disease-phenotype associations in OARD in concordance with Ubergraph (disease)-[disease has feature]-(phenotypic feature)
PREFIX biolink: <https://w3id.org/biolink/vocab/>
PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>
PREFIX mondo: <http://purl.obolibrary.org/obo/MONDO_>
PREFIX ro: <http://purl.obolibrary.org/obo/RO_>
SELECT DISTINCT ?phenotype ?phenotype_label ?oard_predicate ?data_set ?log_odds ?ci1 ?ci2 ?confirming_edge
WHERE {
VALUES ?disease {mondo:0015691}
VALUES ?confirming_edge {ro:0004029} # Define the confirming relationship
# Find OARD associated phenotypes
?assoc biolink:subject ?disease ;
biolink:predicate ?oard_predicate ;
biolink:object ?phenotype .
?phenotype biolink:category biolink:PhenotypicFeature ;
rdfs:label ?phenotype_label .
# OARD and Ubergraph each have labels for the phenotype. This filter helps select 1 label
FILTER(lang(?phenotype_label) = "en")
# Confirm with Ubergraph relationship (condition for inclusion)
?disease ?confirming_edge ?phenotype .
# Pull out OARD association data
?assoc biolink:has_supporting_studies ?study .
# Counts Results
?study biolink:has_study_results ?results_counts .
?results_counts biolink:category biolink:ConceptCountAnalysisResult ;
biolink:supporting_data_set ?data_set .
# Log-Odds Results
?study biolink:has_study_results ?results_log_odds .
?results_log_odds biolink:category biolink:LogOddsAnalysisResult ;
biolink:log_odds_ratio ?log_odds .
?results_log_odds biolink:log_odds_ratio_95_ci ?ci1 , ?ci2 .
FILTER(?ci1 < ?ci2)
}
ORDER BY DESC(?log_odds)
LIMIT 100
graph TD
classDef projected fill:lightgreen;
classDef literal fill:orange;
classDef iri fill:yellow;
v7("?assoc")
v2("?ci1"):::projected
v3("?ci2"):::projected
v6("?confirming_edge"):::projected
v12("?data_set"):::projected
v5("?disease")
v1("?log_odds"):::projected
v8("?oard_predicate"):::projected
v9("?phenotype"):::projected
v4("?phenotype_label"):::projected
v11("?results_counts")
v13("?results_log_odds")
v10("?study")
c6([biolink:PhenotypicFeature]):::iri
c10([biolink:ConceptCountAnalysisResult]):::iri
c12([biolink:LogOddsAnalysisResult]):::iri
f0[["?ci1 < ?ci2"]]
f0 --> v2
f0 --> v3
f1[["?phenotype_label = 'en'"]]
f1 --> v4
bind2[/VALUES ?disease/]
bind2-->v5
bind20([obo:MONDO_0015691])
bind20 --> bind2
bind3[/VALUES ?confirming_edge/]
bind3-->v6
bind30([obo:RO_0004029])
bind30 --> bind3
v7 --"biolink:subject"--> v5
v7 --"biolink:predicate"--> v8
v7 --"biolink:object"--> v9
v9 --"biolink:category"--> c6
v9 --"rdfs:label"--> v4
v5 -->v6--> v9
v7 --"biolink:has_supporting_studies"--> v10
v10 --"biolink:has_study_results"--> v11
v11 --"biolink:category"--> c10
v11 --"biolink:supporting_data_set"--> v12
v10 --"biolink:has_study_results"--> v13
v13 --"biolink:category"--> c12
v13 --"biolink:log_odds_ratio"--> v1
v13 --"biolink:log_odds_ratio_95_ci"--> v2
v13 --"biolink:log_odds_ratio_95_ci"--> v3
Get phenotypes most strongly associated with a disease (Marfan syndrome)
PREFIX biolink: <https://w3id.org/biolink/vocab/>
PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>
PREFIX mondo: <http://purl.obolibrary.org/obo/MONDO_>
SELECT ?phenotype ?phenotype_label ?data_set ?pair_count ?log_odds ?lower_ci ?upper_ci
WHERE {
VALUES ?disease {mondo:0007947} # Marfan syndrome
# Find associated phenotypes
?assoc biolink:subject ?disease ;
biolink:object ?phenotype .
?phenotype biolink:category biolink:PhenotypicFeature ;
rdfs:label ?phenotype_label .
# Get study
?assoc biolink:has_supporting_studies ?study .
# Get Data Set (currently buried in Count Result. TODO: move to Study node)
?study biolink:has_study_results ?results_counts .
?results_counts biolink:category biolink:ConceptCountAnalysisResult ;
biolink:supporting_data_set ?data_set .
# Get Log Odds Analysis Result
?study biolink:has_study_results ?results_log_odds .
?results_log_odds biolink:category biolink:LogOddsAnalysisResult ;
biolink:log_odds_ratio ?log_odds ;
biolink:total_sample_size ?pair_count .
# Log odds confidence interval is represented as two unordered values
?results_log_odds biolink:log_odds_ratio_95_ci ?lower_ci , ?upper_ci .
FILTER(?lower_ci < ?upper_ci)
# Use the lower bound of the CI (value closest to 0) as an indication of association strength with high confidence
BIND(IF(ABS(?lower_ci) < ABS(?upper_ci), ABS(?lower_ci), ABS(?upper_ci)) AS ?dist_to_zero)
}
ORDER BY DESC(?dist_to_zero)
LIMIT 100
graph TD
classDef projected fill:lightgreen;
classDef literal fill:orange;
classDef iri fill:yellow;
v5("?assoc")
v10("?data_set"):::projected
v4("?disease")
v14("?dist_to_zero")
v12("?log_odds"):::projected
v2("?lower_ci"):::projected
v13("?pair_count"):::projected
v6("?phenotype"):::projected
v7("?phenotype_label"):::projected
v9("?results_counts")
v11("?results_log_odds")
v8("?study")
v3("?upper_ci"):::projected
c4([biolink:PhenotypicFeature]):::iri
c8([biolink:ConceptCountAnalysisResult]):::iri
c10([biolink:LogOddsAnalysisResult]):::iri
f0[["?lower_ci < ?upper_ci"]]
f0 --> v2
f0 --> v3
bind1[/VALUES ?disease/]
bind1-->v4
bind10([obo:MONDO_0007947])
bind10 --> bind1
v5 --"biolink:subject"--> v4
v5 --"biolink:object"--> v6
v6 --"biolink:category"--> c4
v6 --"rdfs:label"--> v7
v5 --"biolink:has_supporting_studies"--> v8
v8 --"biolink:has_study_results"--> v9
v9 --"biolink:category"--> c8
v9 --"biolink:supporting_data_set"--> v10
v8 --"biolink:has_study_results"--> v11
v11 --"biolink:category"--> c10
v11 --"biolink:log_odds_ratio"--> v12
v11 --"biolink:total_sample_size"--> v13
v11 --"biolink:log_odds_ratio_95_ci"--> v2
v11 --"biolink:log_odds_ratio_95_ci"--> v3
bind2[/"if(numeric-abs(?lower_ci) < numeric-abs(?upper_ci),numeric-abs(?lower_ci),numeric-abs(?upper_ci))"/]
v2 --o bind2
v3 --o bind2
bind2 --as--o v14
Get diseases most strongly associated with a list of phenotypes
PREFIX biolink: <https://w3id.org/biolink/vocab/>
PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>
PREFIX hp: <http://purl.obolibrary.org/obo/HP_>
SELECT ?disease ?disease_label
(SUM(?best_pair_score) AS ?total_score)
(GROUP_CONCAT(DISTINCT ?pheno_id; SEPARATOR=", ") AS ?matched_pheno_ids)
(GROUP_CONCAT(DISTINCT ?pheno_label; SEPARATOR="|") AS ?matched_pheno_labels)
WHERE {
{
# INNER QUERY: Find the strongest result per Disease-Phenotype pair
SELECT ?disease ?phenotype (STR(?phenotype) AS ?pheno_id) ?pheno_label (MAX(?log_odds) AS ?best_pair_score)
WHERE {
# Input: Your list of Phenotypes
VALUES ?phenotype { hp:0012771 hp:0001166 hp:0001657 }
# Match Association path
?assoc biolink:object ?phenotype ;
biolink:subject ?disease .
?disease biolink:category biolink:Disease .
# Get Phenotype labels here to pass them up
OPTIONAL {
?phenotype rdfs:label ?pheno_label .
}
# Navigate to stats
?assoc biolink:has_supporting_studies/biolink:has_study_results ?stats .
?stats biolink:category biolink:LogOddsAnalysisResult ;
biolink:log_odds_ratio ?log_odds .
}
GROUP BY ?disease ?phenotype ?pheno_label
}
# Pull the disease label for the final output
OPTIONAL {
?disease rdfs:label ?disease_label .
}
}
GROUP BY ?disease ?disease_label
ORDER BY DESC(?total_score)
LIMIT 100
graph TD
classDef projected fill:lightgreen;
classDef literal fill:orange;
classDef iri fill:yellow;
v3("?assoc")
v9("?best_pair_score"):::projected
v4("?disease"):::projected
v10("?disease_label"):::projected
v7("?log_odds")
v11("?matched_pheno_ids")
v12("?matched_pheno_labels")
v8("?pheno_id"):::projected
v5("?pheno_label"):::projected
v2("?phenotype")
v6("?stats")
v11("?total_score")
a1((" "))
c4([biolink:Disease]):::iri
c8([biolink:LogOddsAnalysisResult]):::iri
bind0[/VALUES ?phenotype/]
bind0-->v2
bind00([obo:HP_0012771])
bind00 --> bind0
bind01([obo:HP_0001166])
bind01 --> bind0
bind02([obo:HP_0001657])
bind02 --> bind0
v3 --"biolink:object"--> v2
v3 --"biolink:subject"--> v4
v4 --"biolink:category"--> c4
subgraph optional0["(optional)"]
style optional0 fill:#bbf,stroke-dasharray: 5 5;
v2 -."rdfs:label".-> v5
end
v3 --"biolink:has_supporting_studies"--> a1
a1 --"biolink:has_study_results"--> v6
v6 --"biolink:category"--> c8
v6 --"biolink:log_odds_ratio"--> v7
bind2[/"str(?phenotype)"/]
v2 --o bind2
bind2 --as--o v8
bind3[/"max(?log_odds)"/]
v7 --o bind3
bind3 --as--o v9
subgraph optional1["(optional)"]
style optional1 fill:#bbf,stroke-dasharray: 5 5;
v4 -."rdfs:label".-> v10
end
bind7[/"sum(?best_pair_score)"/]
v9 --o bind7
bind7 --as--o v11
bind8[/"?pheno_id"/]
v8 --o bind8
bind8 --as--o v11
bind9[/"?pheno_label"/]
v5 --o bind9
bind9 --as--o v12
| SPARQL Endpoint | https://frink.apps.renci.org/oard-kg/sparql |
|---|---|
| Triple Pattern Fragments | https://frink.apps.renci.org/ldf/oard-kg |
| Property | Triples |
|---|