Skip to content

Commit

Permalink
arXiv-v1
Browse files Browse the repository at this point in the history
  • Loading branch information
smsharma committed Mar 13, 2024
1 parent fc73bac commit be6e3eb
Show file tree
Hide file tree
Showing 17 changed files with 227 additions and 79 deletions.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
124 changes: 124 additions & 0 deletions notebooks/xx_data_checks.ipynb
Original file line number Diff line number Diff line change
@@ -0,0 +1,124 @@
{
"cells": [
{
"cell_type": "code",
"execution_count": 10,
"metadata": {},
"outputs": [],
"source": [
"from astroquery.mast import Observations\n",
"import numpy as np"
]
},
{
"cell_type": "code",
"execution_count": 22,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"5\n",
"20\n"
]
}
],
"source": [
"proposal_id = 15354\n",
"\n",
"# Query MAST for observations\n",
"obs_table = Observations.query_criteria(\n",
" obs_collection=\"HST\",\n",
" proposal_id=[f\"{proposal_id}\"],\n",
" dataRights=\"PUBLIC\",\n",
")\n",
"\n",
"print(len(obs_table))\n",
"\n",
"# Get preview products\n",
"products = Observations.get_product_list(obs_table)\n",
"products = products[products[\"productType\"] == \"PREVIEW\"]\n",
"\n",
"match = \"total\"\n",
"exclude = \"color\"\n",
"match_mask = [match in row[\"productFilename\"] for row in products]\n",
"exclude_mask = [exclude not in row[\"productFilename\"] for row in products]\n",
"mask = [m and e for m, e in zip(match_mask, exclude_mask)]\n",
"\n",
"seed = 42\n",
"n_max_images = 20\n",
"\n",
"products = products[mask][np.random.RandomState(seed=seed).choice(np.arange(len(products[mask])), n_max_images)]\n",
"\n",
"print(len(products))"
]
},
{
"cell_type": "code",
"execution_count": 23,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"INFO: 19 of 20 products were duplicates. Only downloading 1 unique product(s). [astroquery.mast.observations]\n",
"Downloading URL https://mast.stsci.edu/api/v0.1/Download/file?uri=mast:HST/product/hst_15354_01_acs_wfc_total_jdi601_drc.jpg to download_dir/mastDownload/HST/hst_15354_01_acs_wfc_total_jdi601/hst_15354_01_acs_wfc_total_jdi601_drc.jpg ... [Done]\n"
]
}
],
"source": [
"import os\n",
"\n",
"download_dir = 'download_dir'\n",
"\n",
"Observations.download_products(\n",
" products,\n",
" extension=[\"jpg\", \"jpeg\"],\n",
" productType=\"PREVIEW\",\n",
" download_dir=download_dir,\n",
")\n",
"\n",
"# Recursively find all images in \"total_images\" directory and bring them up to \"total_images\"\n",
"for root, dirs, files in os.walk(download_dir):\n",
" for file in files:\n",
" if file.endswith((\".jpg\", \".jpeg\")):\n",
" source = os.path.join(root, file)\n",
" destination = os.path.join(download_dir, file)\n",
"\n",
" # Check if file already exists in the destination\n",
" if not os.path.exists(destination):\n",
" os.rename(source, destination)\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "base",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.9.13"
}
},
"nbformat": 4,
"nbformat_minor": 2
}
Binary file added paper/hubble.zip
Binary file not shown.
39 changes: 39 additions & 0 deletions paper/hubble_paperclip.bib
Original file line number Diff line number Diff line change
Expand Up @@ -464,4 +464,43 @@ @article{akhmetzhanova2024data
pages = {7459--7481},
year = {2024},
publisher = {Oxford University Press}
}

@article{Birk:2024knn,
author = {Birk, Joschka and Hallin, Anna and Kasieczka, Gregor},
title = {{OmniJet-$\alpha$: The first cross-task foundation model for particle physics}},
eprint = {2403.05618},
archiveprefix = {arXiv},
primaryclass = {hep-ph},
month = {3},
year = {2024}
}

@article{heinrich2024masked,
title = {Masked Particle Modeling on Sets: Towards Self-Supervised High Energy Physics Foundation Models},
author = {Heinrich, Lukas and Kagan, Michael and Klein, Samuel and Leigh, Matthew and Golling, Tobias and Raine, John Andrew and Osadchy, Margarita},
journal = {arXiv preprint arXiv:2401.13537},
year = {2024}
}

@article{mccabe2023multiple,
title = {Multiple physics pretraining for physical surrogate models},
author = {McCabe, Michael and Blancard, Bruno R{\'e}galdo-Saint and Parker, Liam Holden and Ohana, Ruben and Cranmer, Miles and Bietti, Alberto and Eickenberg, Michael and Golkar, Siavash and Krawezik, Geraud and Lanusse, Francois and others},
journal = {arXiv preprint arXiv:2310.02994},
year = {2023}
}

@article{vaswani2017attention,
title = {Attention is all you need},
author = {Vaswani, Ashish and Shazeer, Noam and Parmar, Niki and Uszkoreit, Jakob and Jones, Llion and Gomez, Aidan N and Kaiser, {\L}ukasz and Polosukhin, Illia},
journal = {Advances in neural information processing systems},
volume = {30},
year = {2017}
}

@article{dosovitskiy2020image,
title = {An image is worth 16x16 words: Transformers for image recognition at scale},
author = {Dosovitskiy, Alexey and Beyer, Lucas and Kolesnikov, Alexander and Weissenborn, Dirk and Zhai, Xiaohua and Unterthiner, Thomas and Dehghani, Mostafa and Minderer, Matthias and Heigold, Georg and Gelly, Sylvain and others},
journal = {arXiv preprint arXiv:2010.11929},
year = {2020}
}
Binary file modified paper/hubble_paperclip.pdf
Binary file not shown.
Loading

0 comments on commit be6e3eb

Please sign in to comment.