oard-kg

Open Annotations for Rare Diseases (OARD) Knowledge Graph

Clinical associations between rare diseases and phenotypes derived from electronic health records

48.3M triples
0 classes
15 properties
10.1M subjects

OARD-KG is an RDF triplestore which provides a SPARQL query endpoint for clinical associations between rare diseases and phenotypes derived from electronic health records.

Get diseases most strongly associated with a phenotype (Increased total monocyte count)
PREFIX biolink: <https://w3id.org/biolink/vocab/>
PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>
PREFIX hp: <http://purl.obolibrary.org/obo/HP_>

SELECT ?disease ?label ?data_set ?pair_count ?log_odds ?lower_ci ?upper_ci 
WHERE {
  VALUES ?phenotype {hp:0012311}  # Increased total monocyte count
  
    # Find associated phenotypes
    ?assoc biolink:subject ?disease ;
           biolink:object ?phenotype .
    ?disease biolink:category biolink:Disease ;
               rdfs:label ?label .

    # Get study
    ?assoc biolink:has_supporting_studies ?study .
  
    # Get Data Set (currently buried in Count Result. TODO: move to Study node)
    ?study biolink:has_study_results ?results_counts .
    ?results_counts biolink:category biolink:ConceptCountAnalysisResult ;
                    biolink:supporting_data_set ?data_set .
  
    # Get Log Odds Analysis Result
    ?study biolink:has_study_results ?results_log_odds .
    ?results_log_odds biolink:category biolink:LogOddsAnalysisResult ;
              biolink:log_odds_ratio ?log_odds ;
                      biolink:total_sample_size ?pair_count .
  
    # Log odds confidence interval is represented as two unordered values
    ?results_log_odds  biolink:log_odds_ratio_95_ci ?lower_ci , ?upper_ci .
    FILTER(?lower_ci < ?upper_ci) 

    # Use the lower bound of the CI (value closest to 0) as an indication of association strength with high confidence
    BIND(IF(ABS(?lower_ci) < ABS(?upper_ci), ABS(?lower_ci), ABS(?upper_ci)) AS ?dist_to_zero)
}
ORDER BY DESC(?dist_to_zero)
LIMIT 100
graph TD
classDef projected fill:lightgreen;
classDef literal fill:orange;
classDef iri fill:yellow;
  v5("?assoc")
  v10("?data_set"):::projected 
  v6("?disease"):::projected 
  v14("?dist_to_zero")
  v7("?label"):::projected 
  v12("?log_odds"):::projected 
  v2("?lower_ci"):::projected 
  v13("?pair_count"):::projected 
  v4("?phenotype")
  v9("?results_counts")
  v11("?results_log_odds")
  v8("?study")
  v3("?upper_ci"):::projected 
  c8([biolink:ConceptCountAnalysisResult]):::iri 
  c4([biolink:Disease]):::iri 
  c10([biolink:LogOddsAnalysisResult]):::iri 
  f0[["?lower_ci < ?upper_ci"]]
  f0 --> v2
  f0 --> v3
  bind1[/VALUES ?phenotype/]
  bind1-->v4
  bind10([obo:HP_0012311])
  bind10 --> bind1
  v5 --"biolink:subject"-->  v6
  v5 --"biolink:object"-->  v4
  v6 --"biolink:category"-->  c4
  v6 --"rdfs:label"-->  v7
  v5 --"biolink:has_supporting_studies"-->  v8
  v8 --"biolink:has_study_results"-->  v9
  v9 --"biolink:category"-->  c8
  v9 --"biolink:supporting_data_set"-->  v10
  v8 --"biolink:has_study_results"-->  v11
  v11 --"biolink:category"-->  c10
  v11 --"biolink:log_odds_ratio"-->  v12
  v11 --"biolink:total_sample_size"-->  v13
  v11 --"biolink:log_odds_ratio_95_ci"-->  v2
  v11 --"biolink:log_odds_ratio_95_ci"-->  v3
  bind2[/"if(numeric-abs(?lower_ci) < numeric-abs(?upper_ci),numeric-abs(?lower_ci),numeric-abs(?upper_ci))"/]
  v2 --o bind2
  v3 --o bind2
  bind2 --as--o v14
Get disease-phenotype associations in OARD in concordance with Ubergraph (disease)-[disease has feature]-(phenotypic feature)
PREFIX biolink: <https://w3id.org/biolink/vocab/>
PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>
PREFIX mondo: <http://purl.obolibrary.org/obo/MONDO_>
PREFIX ro: <http://purl.obolibrary.org/obo/RO_>

SELECT DISTINCT ?phenotype ?phenotype_label ?oard_predicate ?data_set ?log_odds ?ci1 ?ci2 ?confirming_edge
WHERE {
    VALUES ?disease {mondo:0015691}
    VALUES ?confirming_edge {ro:0004029} # Define the confirming relationship

    # Find OARD associated phenotypes
    ?assoc biolink:subject ?disease ;
           biolink:predicate ?oard_predicate ;
           biolink:object ?phenotype .
    ?phenotype biolink:category biolink:PhenotypicFeature ;
               rdfs:label ?phenotype_label .
    # OARD and Ubergraph each have labels for the phenotype. This filter helps select 1 label
  	FILTER(lang(?phenotype_label) = "en")

    # Confirm with Ubergraph relationship (condition for inclusion)
    ?disease ?confirming_edge ?phenotype .

    # Pull out OARD association data
    ?assoc biolink:has_supporting_studies ?study .
    # Counts Results
    ?study biolink:has_study_results ?results_counts .
    ?results_counts biolink:category biolink:ConceptCountAnalysisResult ;
                    biolink:supporting_data_set ?data_set .
    # Log-Odds Results
    ?study biolink:has_study_results ?results_log_odds .
    ?results_log_odds biolink:category biolink:LogOddsAnalysisResult ;
                  biolink:log_odds_ratio ?log_odds .    
    ?results_log_odds biolink:log_odds_ratio_95_ci ?ci1 , ?ci2 .
    FILTER(?ci1 < ?ci2) 
}
ORDER BY DESC(?log_odds)
LIMIT 100
graph TD
classDef projected fill:lightgreen;
classDef literal fill:orange;
classDef iri fill:yellow;
  v7("?assoc")
  v2("?ci1"):::projected 
  v3("?ci2"):::projected 
  v6("?confirming_edge"):::projected 
  v12("?data_set"):::projected 
  v5("?disease")
  v1("?log_odds"):::projected 
  v8("?oard_predicate"):::projected 
  v9("?phenotype"):::projected 
  v4("?phenotype_label"):::projected 
  v11("?results_counts")
  v13("?results_log_odds")
  v10("?study")
  c6([biolink:PhenotypicFeature]):::iri 
  c10([biolink:ConceptCountAnalysisResult]):::iri 
  c12([biolink:LogOddsAnalysisResult]):::iri 
  f0[["?ci1 < ?ci2"]]
  f0 --> v2
  f0 --> v3
  f1[["?phenotype_label = 'en'"]]
  f1 --> v4
  bind2[/VALUES ?disease/]
  bind2-->v5
  bind20([obo:MONDO_0015691])
  bind20 --> bind2
  bind3[/VALUES ?confirming_edge/]
  bind3-->v6
  bind30([obo:RO_0004029])
  bind30 --> bind3
  v7 --"biolink:subject"-->  v5
  v7 --"biolink:predicate"-->  v8
  v7 --"biolink:object"-->  v9
  v9 --"biolink:category"-->  c6
  v9 --"rdfs:label"-->  v4
  v5 -->v6--> v9
  v7 --"biolink:has_supporting_studies"-->  v10
  v10 --"biolink:has_study_results"-->  v11
  v11 --"biolink:category"-->  c10
  v11 --"biolink:supporting_data_set"-->  v12
  v10 --"biolink:has_study_results"-->  v13
  v13 --"biolink:category"-->  c12
  v13 --"biolink:log_odds_ratio"-->  v1
  v13 --"biolink:log_odds_ratio_95_ci"-->  v2
  v13 --"biolink:log_odds_ratio_95_ci"-->  v3
Get phenotypes most strongly associated with a disease (Marfan syndrome)
PREFIX biolink: <https://w3id.org/biolink/vocab/>
PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>
PREFIX mondo: <http://purl.obolibrary.org/obo/MONDO_>

SELECT ?phenotype ?phenotype_label ?data_set ?pair_count ?log_odds ?lower_ci ?upper_ci 
WHERE {
  VALUES ?disease {mondo:0007947}  # Marfan syndrome
  
    # Find associated phenotypes
    ?assoc biolink:subject ?disease ;
           biolink:object ?phenotype .
    ?phenotype biolink:category biolink:PhenotypicFeature ;
               rdfs:label ?phenotype_label .

    # Get study
    ?assoc biolink:has_supporting_studies ?study .
  
    # Get Data Set (currently buried in Count Result. TODO: move to Study node)
    ?study biolink:has_study_results ?results_counts .
    ?results_counts biolink:category biolink:ConceptCountAnalysisResult ;
                    biolink:supporting_data_set ?data_set .
  
    # Get Log Odds Analysis Result
    ?study biolink:has_study_results ?results_log_odds .
    ?results_log_odds biolink:category biolink:LogOddsAnalysisResult ;
              biolink:log_odds_ratio ?log_odds ;
                      biolink:total_sample_size ?pair_count .
  
    # Log odds confidence interval is represented as two unordered values
    ?results_log_odds  biolink:log_odds_ratio_95_ci ?lower_ci , ?upper_ci .
    FILTER(?lower_ci < ?upper_ci) 

    # Use the lower bound of the CI (value closest to 0) as an indication of association strength with high confidence
    BIND(IF(ABS(?lower_ci) < ABS(?upper_ci), ABS(?lower_ci), ABS(?upper_ci)) AS ?dist_to_zero)
}
ORDER BY DESC(?dist_to_zero)
LIMIT 100
graph TD
classDef projected fill:lightgreen;
classDef literal fill:orange;
classDef iri fill:yellow;
  v5("?assoc")
  v10("?data_set"):::projected 
  v4("?disease")
  v14("?dist_to_zero")
  v12("?log_odds"):::projected 
  v2("?lower_ci"):::projected 
  v13("?pair_count"):::projected 
  v6("?phenotype"):::projected 
  v7("?phenotype_label"):::projected 
  v9("?results_counts")
  v11("?results_log_odds")
  v8("?study")
  v3("?upper_ci"):::projected 
  c4([biolink:PhenotypicFeature]):::iri 
  c8([biolink:ConceptCountAnalysisResult]):::iri 
  c10([biolink:LogOddsAnalysisResult]):::iri 
  f0[["?lower_ci < ?upper_ci"]]
  f0 --> v2
  f0 --> v3
  bind1[/VALUES ?disease/]
  bind1-->v4
  bind10([obo:MONDO_0007947])
  bind10 --> bind1
  v5 --"biolink:subject"-->  v4
  v5 --"biolink:object"-->  v6
  v6 --"biolink:category"-->  c4
  v6 --"rdfs:label"-->  v7
  v5 --"biolink:has_supporting_studies"-->  v8
  v8 --"biolink:has_study_results"-->  v9
  v9 --"biolink:category"-->  c8
  v9 --"biolink:supporting_data_set"-->  v10
  v8 --"biolink:has_study_results"-->  v11
  v11 --"biolink:category"-->  c10
  v11 --"biolink:log_odds_ratio"-->  v12
  v11 --"biolink:total_sample_size"-->  v13
  v11 --"biolink:log_odds_ratio_95_ci"-->  v2
  v11 --"biolink:log_odds_ratio_95_ci"-->  v3
  bind2[/"if(numeric-abs(?lower_ci) < numeric-abs(?upper_ci),numeric-abs(?lower_ci),numeric-abs(?upper_ci))"/]
  v2 --o bind2
  v3 --o bind2
  bind2 --as--o v14
Get diseases most strongly associated with a list of phenotypes
PREFIX biolink: <https://w3id.org/biolink/vocab/>
PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>
PREFIX hp: <http://purl.obolibrary.org/obo/HP_>

SELECT ?disease ?disease_label 
       (SUM(?best_pair_score) AS ?total_score)
       (GROUP_CONCAT(DISTINCT ?pheno_id; SEPARATOR=", ") AS ?matched_pheno_ids)
       (GROUP_CONCAT(DISTINCT ?pheno_label; SEPARATOR="|") AS ?matched_pheno_labels)
WHERE {
  {
    # INNER QUERY: Find the strongest result per Disease-Phenotype pair
    SELECT ?disease ?phenotype (STR(?phenotype) AS ?pheno_id) ?pheno_label (MAX(?log_odds) AS ?best_pair_score)
    WHERE {
      # Input: Your list of Phenotypes
      VALUES ?phenotype { hp:0012771 hp:0001166 hp:0001657 }

      # Match Association path
      ?assoc biolink:object ?phenotype ;
             biolink:subject ?disease .
      
      ?disease biolink:category biolink:Disease .
      
      # Get Phenotype labels here to pass them up
      OPTIONAL { 
        ?phenotype rdfs:label ?pheno_label . 
      }
      
      # Navigate to stats
      ?assoc biolink:has_supporting_studies/biolink:has_study_results ?stats .
      ?stats biolink:category biolink:LogOddsAnalysisResult ;
             biolink:log_odds_ratio ?log_odds .
    }
    GROUP BY ?disease ?phenotype ?pheno_label
  }

  # Pull the disease label for the final output
  OPTIONAL {
    ?disease rdfs:label ?disease_label .
  }
}
GROUP BY ?disease ?disease_label
ORDER BY DESC(?total_score)
LIMIT 100
graph TD
classDef projected fill:lightgreen;
classDef literal fill:orange;
classDef iri fill:yellow;
  v3("?assoc")
  v9("?best_pair_score"):::projected 
  v4("?disease"):::projected 
  v10("?disease_label"):::projected 
  v7("?log_odds")
  v11("?matched_pheno_ids")
  v12("?matched_pheno_labels")
  v8("?pheno_id"):::projected 
  v5("?pheno_label"):::projected 
  v2("?phenotype")
  v6("?stats")
  v11("?total_score")
  a1((" "))
  c4([biolink:Disease]):::iri 
  c8([biolink:LogOddsAnalysisResult]):::iri 
  bind0[/VALUES ?phenotype/]
  bind0-->v2
  bind00([obo:HP_0012771])
  bind00 --> bind0
  bind01([obo:HP_0001166])
  bind01 --> bind0
  bind02([obo:HP_0001657])
  bind02 --> bind0
  v3 --"biolink:object"-->  v2
  v3 --"biolink:subject"-->  v4
  v4 --"biolink:category"-->  c4
  subgraph optional0["(optional)"]
  style optional0 fill:#bbf,stroke-dasharray: 5 5;
    v2 -."rdfs:label".->  v5
  end
  v3 --"biolink:has_supporting_studies"-->  a1
  a1 --"biolink:has_study_results"-->  v6
  v6 --"biolink:category"-->  c8
  v6 --"biolink:log_odds_ratio"-->  v7
  bind2[/"str(?phenotype)"/]
  v2 --o bind2
  bind2 --as--o v8
  bind3[/"max(?log_odds)"/]
  v7 --o bind3
  bind3 --as--o v9
  subgraph optional1["(optional)"]
  style optional1 fill:#bbf,stroke-dasharray: 5 5;
    v4 -."rdfs:label".->  v10
  end
  bind7[/"sum(?best_pair_score)"/]
  v9 --o bind7
  bind7 --as--o v11
  bind8[/"?pheno_id"/]
  v8 --o bind8
  bind8 --as--o v11
  bind9[/"?pheno_label"/]
  v5 --o bind9
  bind9 --as--o v12
SPARQL Endpoint https://frink.apps.renci.org/oard-kg/sparql
Triple Pattern Fragments https://frink.apps.renci.org/ldf/oard-kg
PropertyTriples