Examples

Handling large batches

 1"""In this example, we open a large fasta file with thousands of sequence
 2(could be millions) and send it little batch by little batch to the API, and save
 3the outputs (here embeddings) to disk."""
 4
 5from pathlib import Path
 6import json
 7
 8from ginkgo_ai_client import GinkgoAIClient, MeanEmbeddingQuery
 9
10input_file = Path(__file__).parent / "data" / "100_dna_sequences.fasta"
11output_folder = Path(__file__).parent / "outputs" / "large_batches"
12output_folder.mkdir(parents=True, exist_ok=True)
13
14client = GinkgoAIClient()
15model = "ginkgo-maskedlm-3utr-v1"
16queries = MeanEmbeddingQuery.iter_from_fasta(input_file, model=model)
17for batch_result in client.send_requests_by_batches(queries, batch_size=10):
18    for query_result in batch_result:
19        with open(output_folder / f"{query_result.query_name}.json", "w") as f:
20            json.dump(query_result.dict(), f)

Example by application

ESM model

 1"""In this example we compute embedding and run masked inference
 2 on the ESM2 language model."""
 3
 4from ginkgo_ai_client import (
 5    GinkgoAIClient,
 6    MaskedInferenceQuery,
 7    MeanEmbeddingQuery,
 8)
 9
10client = GinkgoAIClient()
11model = "esm2-650M"
12
13# SIMPLE QUERY FOR EMBEDDING COMPUTATION
14
15query = MeanEmbeddingQuery(sequence="MLYLRRL", model=model)
16prediction = client.send_request(query)
17# prediction.embedding == [1.05, -2.34, ...]
18
19
20# SIMPLE QUERY FOR MASKED INFERENCE
21
22query = MaskedInferenceQuery(sequence="MLY<mask>RRL", model=model)
23prediction = client.send_request(query)
24# prediction.sequence == "MLYRRL"
25
26# BATCH REQUEST
27
28queries = [
29    MeanEmbeddingQuery(sequence=sequence, model=model)
30    for sequence in ["MLYLRRL", "MLL", "MLYLLRRL"]
31]
32predictions = client.send_batch_request(queries)
33# predictions[0].embedding == [1.05, -2.34, ...]

AA0 model

 1"""In this example we compute embedding and run masked inference
 2 on the aa0 language model."""
 3
 4from ginkgo_ai_client import (
 5    GinkgoAIClient,
 6    MaskedInferenceQuery,
 7    MeanEmbeddingQuery,
 8)
 9
10client = GinkgoAIClient()
11model = "ginkgo-aa0-650M"
12
13# SIMPLE QUERY FOR EMBEDDING COMPUTATION
14
15query = MeanEmbeddingQuery(sequence="MLYLRRL", model=model)
16prediction = client.send_request(query)
17# prediction.embedding == [1.05, -2.34, ...]
18
19
20# SIMPLE QUERY FOR MASKED INFERENCE
21
22query = MaskedInferenceQuery(sequence="MLY<mask>RRL", model=model)
23prediction = client.send_request(query)
24# prediction.sequence == "MLYRRL"
25
26# BATCH REQUEST
27
28queries = [
29    MeanEmbeddingQuery(sequence=sequence, model=model)
30    for sequence in ["MLYLRRL", "MLL", "MLYLLRRL"]
31]
32predictions = client.send_batch_request(queries)
33# predictions[0].embedding == [1.05, -2.34, ...]

3’UTR model

 1"""In this example we compute embedding and run masked inference
 2 on Ginkgo's 3'UTR language model."""
 3
 4from ginkgo_ai_client import (
 5    GinkgoAIClient,
 6    MaskedInferenceQuery,
 7    MeanEmbeddingQuery,
 8)
 9
10client = GinkgoAIClient()
11model = "ginkgo-maskedlm-3utr-v1"
12
13# SIMPLE QUERY FOR EMBEDDING COMPUTATION
14
15query = MeanEmbeddingQuery(sequence="ATTGCG", model=model)
16prediction = client.send_request(query)
17# prediction.embedding == [1.05, -2.34, ...]
18
19
20# SIMPLE QUERY FOR MASKED INFERENCE
21
22query = MaskedInferenceQuery(sequence="ATT<mask>TAC", model=model)
23prediction = client.send_request(query)
24
25# BATCH REQUEST
26
27queries = [
28    MeanEmbeddingQuery(sequence=sequence, model=model)
29    for sequence in ["AGCGC", "ATTGCG", "TACCGCA"]
30]
31predictions = client.send_batch_request(queries)
32# predictions[0].embedding == [1.05, -2.34, ...]

Promoter activity with Promoter-0

 1"""
 2This example shows how to use the PromoterActivityQuery to predict the activity of a
 3promoter in different tissues, based on the Borzoi model.
 4"""
 5
 6from pathlib import Path
 7from ginkgo_ai_client import GinkgoAIClient, PromoterActivityQuery
 8
 9client = GinkgoAIClient()
10orf_sequence = "tgccagccatctgttgtttgcc"
11promoter_sequence = "GTCCCACTGATGAACTGTGCT"
12
13
14query = PromoterActivityQuery(
15    promoter_sequence=promoter_sequence,
16    orf_sequence=orf_sequence,
17    source="expression",
18    tissue_of_interest={
19        "heart": ["CNhs10608+", "CNhs10612+"],
20        "liver": ["CNhs10608+", "CNhs10612+"],
21    },
22)
23
24response = client.send_request(query)
25print("Single-query response:", response)
26
27
28# In this next example we pull the promoter files from a fasta file and send them
29# in batches, writing the results to a JSONL, as they arrive.
30
31fasta_path = Path(__file__).parent / "data" / "100_dna_sequences.fasta"
32queries = PromoterActivityQuery.iter_with_promoter_from_fasta(
33    fasta_path=fasta_path,
34    orf_sequence=orf_sequence,
35    source="expression",
36    tissue_of_interest={
37        "heart": ["CNhs10608+", "CNhs10612+"],
38        "liver": ["CNhs10608+", "CNhs10612+"],
39    },
40)
41
42print("Now sending 100 requests, by batches of 10")
43print("Writing results to promoter_activity.jsonl...")
44output_file = Path(__file__).parent / "outputs" / "promoter_activity.jsonl"
45for batch_result in client.send_requests_by_batches(queries, batch_size=10):
46    for query_result in batch_result:
47        query_result.write_to_jsonl(output_file)

Boltz structure inference

Structure inference with a simple (single-chain) protein sequence:

 1"""Simple example where we predict the 3D structure of the GFP protein."""
 2
 3from ginkgo_ai_client import GinkgoAIClient, BoltzStructurePredictionQuery
 4
 5client = GinkgoAIClient()
 6sequence = (
 7    "MSKGEELFTGVVPILVELDGDVNGHKFSVSGEGEGDATYGKLTLKFICTTGKLPVPWPTL"
 8    "VTTFSYGVQCFSRYPDHMKQHDFFKSAMPEGYVQERTIFFKDDGNYKTRAEVKFEGDTLV"
 9    "NRIELKGIDFKEDGNILGHKLEYNYNSHNVYIMADKQKNGIKVNFKIRHNIEDGSVQLAD"
10    "HYQQNTPIGDGPVLLPDNHYLSTQSALSKDPNEKRDHMVLLEFVTAAGITHGMDELYK"
11)
12
13query = BoltzStructurePredictionQuery.from_protein_sequence(sequence)
14response = client.send_request(query)
15response.download_structure("GFP.pdb")

Structure inference with a multimer protein sequence and ligand(s):

1"""We predict the structure of the multimer protein with ligand(s)."""
2
3from ginkgo_ai_client import GinkgoAIClient, BoltzStructurePredictionQuery
4
5client = GinkgoAIClient()
6query = BoltzStructurePredictionQuery.from_yaml_file("with_ligand.yaml")
7print("Sending the request, it might take a hot minute...")
8response = client.send_request(query, timeout=1000)
9response.download_structure("with_ligand.cif")