scientific-skills/glycoengineering/references/glycan_databases.md
import requests
def lookup_glytoucan(glytoucan_id: str) -> dict:
"""Fetch glycan details from GlyTouCan."""
url = f"https://api.glytoucan.org/glycan/{glytoucan_id}"
response = requests.get(url, headers={"Accept": "application/json"})
return response.json() if response.ok else {}
import requests
def get_glycoprotein_info(uniprot_id: str) -> dict:
"""Get glycosylation data for a protein from GlyConnect."""
base_url = "https://glyconnect.expasy.org/api"
response = requests.get(f"{base_url}/proteins/uniprot/{uniprot_id}")
return response.json() if response.ok else {}
def get_glycan_compositions(glyconnect_protein_id: int) -> list:
"""Get all glycan compositions for a GlyConnect protein entry."""
base_url = "https://glyconnect.expasy.org/api"
response = requests.get(f"{base_url}/compositions/protein/{glyconnect_protein_id}")
return response.json().get("data", []) if response.ok else []
Codes complex N-glycans as text strings:
G0F = Core-fucosylated, biantennary, no galactose
G1F = Core-fucosylated, one galactose
G2F = Core-fucosylated, two galactoses
G2FS1 = Core-fucosylated, two galactoses, one sialic acid
G2FS2 = Core-fucosylated, two galactoses, two sialic acids
M5 = High mannose 5 (Man5GlcNAc2)
M9 = High mannose 9 (Man9GlcNAc2)
Standard colored symbols for publications:
| Therapeutic | Target | Key Glycosylation | Function |
|---|---|---|---|
| IgG1 antibody | Various | N297 (Fc) | ADCC/CDC effector function |
| Erythropoietin | EPOR | N24, N38, N83, O-glycans | Pharmacokinetics |
| Etanercept | TNF | N420 (IgG1 Fc) | Half-life |
| tPA (alteplase) | Fibrin | N117, N184, N448 | Fibrin binding |
| Factor VIII | VWF | 25 N-glycosites | Clearance |
from glycoengineering_tools import find_n_glycosylation_sequons, predict_o_glycosylation_hotspots
import pandas as pd
def analyze_glycosylation_landscape(sequences_dict: dict) -> pd.DataFrame:
"""
Batch analysis of glycosylation for multiple proteins.
Args:
sequences_dict: {protein_name: sequence}
Returns:
DataFrame with glycosylation summary per protein
"""
results = []
for name, seq in sequences_dict.items():
n_sites = find_n_glycosylation_sequons(seq)
o_sites = predict_o_glycosylation_hotspots(seq)
results.append({
'protein': name,
'length': len(seq),
'n_glycosites': len(n_sites),
'o_glyco_hotspots': len(o_sites),
'n_glyco_density': len(n_sites) / len(seq) * 100,
'n_glyco_positions': [s['position'] for s in n_sites]
})
return pd.DataFrame(results)