first commit
This commit is contained in:
.gitignoreSetParams.zipnohup.outpytest.inirequirements.txtrequirements_p37.txtsetparams.py
SetParams
SetParams_Demo.py__init__.pyapp
__init__.py
deploy.shdocker-compose.ymlconfigs
controller
core
exts.pyfile_tool.pyjson_util.pymodels
run.pyschemas
services
utils
DateTimeUtil.pyEncryptionTool.pyJSONEncodeTools.pyRedisMQTool.pySimpleSqlite3Tool.pySnowflakeTool.pyStandardizedOutput.pyUDPReceive.pyUDPSender.pyWebsocketClient.pyYamlTool.py__init__.pyredis_config.pywebsocket_tool.py
yolov5
.dockerignore.gitattributesdetect_server.pyexport.pyhubconf.py
.github
CODE_OF_CONDUCT.md
.gitignore.pre-commit-config.yamlCONTRIBUTING.mdLICENSEREADME.mdISSUE_TEMPLATE
PULL_REQUEST_TEMPLATE.mdREADME_cn.mdSECURITY.mddependabot.ymlworkflows
classify
data
Argoverse.yamlGlobalWheat2020.yamlImageNet.yamlObjects365.yamlSKU-110K.yamlVOC.yamlVisDrone.yamlcoco.yamlcoco128 - 副本.yamlcoco128.yaml
hyps
scripts
xView.yamldata_set
170111334447456256
models
__init__.pycommon.pyexperimental.py
train_server.pytutorial.ipynbhub
anchors.yamlyolov3-spp.yamlyolov3-tiny.yamlyolov3.yamlyolov5-bifpn.yamlyolov5-fpn.yamlyolov5-p2.yamlyolov5-p34.yamlyolov5-p6.yamlyolov5-p7.yamlyolov5-panet.yamlyolov5l6.yamlyolov5m6.yamlyolov5n6.yamlyolov5s-ghost.yamlyolov5s-transformer.yamlyolov5s6.yamlyolov5x6.yaml
tf.pyyolo.pyyolov5l.yamlyolov5m.yamlyolov5n.yamlyolov5s.yamlyolov5x.yamlutils
__init__.pyactivations.pyaugmentations.pyautoanchor.pyautobatch.py
val.pyvalidate_server.py圆孔_123_RODY_1_640.zipaws
benchmarks.pycallbacks.pydataloaders.pydocker
downloads.pyflask_rest_api
general.pygoogle_app_engine
loggers
loss.pymetrics.pyplots.pytorch_utils.pydocs
.back_flask_demo
flask_demo-backup-2022812170612.pdma.jsonflask_demo-backup-2022812170618.pdma.jsonflask_demo-backup-2022812170626.pdma.json
flask_demo.pdma.jsonindex.apibtests
0
app/yolov5/utils/aws/__init__.py
Normal file
0
app/yolov5/utils/aws/__init__.py
Normal file
26
app/yolov5/utils/aws/mime.sh
Normal file
26
app/yolov5/utils/aws/mime.sh
Normal file
@ -0,0 +1,26 @@
|
||||
# AWS EC2 instance startup 'MIME' script https://aws.amazon.com/premiumsupport/knowledge-center/execute-user-data-ec2/
|
||||
# This script will run on every instance restart, not only on first start
|
||||
# --- DO NOT COPY ABOVE COMMENTS WHEN PASTING INTO USERDATA ---
|
||||
|
||||
Content-Type: multipart/mixed; boundary="//"
|
||||
MIME-Version: 1.0
|
||||
|
||||
--//
|
||||
Content-Type: text/cloud-config; charset="us-ascii"
|
||||
MIME-Version: 1.0
|
||||
Content-Transfer-Encoding: 7bit
|
||||
Content-Disposition: attachment; filename="cloud-config.txt"
|
||||
|
||||
#cloud-config
|
||||
cloud_final_modules:
|
||||
- [scripts-user, always]
|
||||
|
||||
--//
|
||||
Content-Type: text/x-shellscript; charset="us-ascii"
|
||||
MIME-Version: 1.0
|
||||
Content-Transfer-Encoding: 7bit
|
||||
Content-Disposition: attachment; filename="userdata.txt"
|
||||
|
||||
#!/bin/bash
|
||||
# --- paste contents of userdata.sh here ---
|
||||
--//
|
40
app/yolov5/utils/aws/resume.py
Normal file
40
app/yolov5/utils/aws/resume.py
Normal file
@ -0,0 +1,40 @@
|
||||
# Resume all interrupted trainings in yolov5/ dir including DDP trainings
|
||||
# Usage: $ python utils/aws/resume.py
|
||||
|
||||
import os
|
||||
import sys
|
||||
from pathlib import Path
|
||||
|
||||
import torch
|
||||
import yaml
|
||||
|
||||
FILE = Path(__file__).resolve()
|
||||
ROOT = FILE.parents[2] # YOLOv5 root directory
|
||||
if str(ROOT) not in sys.path:
|
||||
sys.path.append(str(ROOT)) # add ROOT to PATH
|
||||
|
||||
port = 0 # --master_port
|
||||
path = Path('').resolve()
|
||||
for last in path.rglob('*/**/last.pt'):
|
||||
ckpt = torch.load(last)
|
||||
if ckpt['optimizer'] is None:
|
||||
continue
|
||||
|
||||
# Load opt.yaml
|
||||
with open(last.parent.parent / 'opt.yaml', errors='ignore') as f:
|
||||
opt = yaml.safe_load(f)
|
||||
|
||||
# Get device count
|
||||
d = opt['device'].split(',') # devices
|
||||
nd = len(d) # number of devices
|
||||
ddp = nd > 1 or (nd == 0 and torch.cuda.device_count() > 1) # distributed data parallel
|
||||
|
||||
if ddp: # multi-GPU
|
||||
port += 1
|
||||
cmd = f'python -m torch.distributed.run --nproc_per_node {nd} --master_port {port} train.py --resume {last}'
|
||||
else: # single-GPU
|
||||
cmd = f'python train.py --resume {last}'
|
||||
|
||||
cmd += ' > /dev/null 2>&1 &' # redirect output to dev/null and run in daemon thread
|
||||
print(cmd)
|
||||
os.system(cmd)
|
27
app/yolov5/utils/aws/userdata.sh
Normal file
27
app/yolov5/utils/aws/userdata.sh
Normal file
@ -0,0 +1,27 @@
|
||||
#!/bin/bash
|
||||
# AWS EC2 instance startup script https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/user-data.html
|
||||
# This script will run only once on first instance start (for a re-start script see mime.sh)
|
||||
# /home/ubuntu (ubuntu) or /home/ec2-user (amazon-linux) is working dir
|
||||
# Use >300 GB SSD
|
||||
|
||||
cd home/ubuntu
|
||||
if [ ! -d yolov5 ]; then
|
||||
echo "Running first-time script." # install dependencies, download COCO, pull Docker
|
||||
git clone https://github.com/ultralytics/yolov5 -b master && sudo chmod -R 777 yolov5
|
||||
cd yolov5
|
||||
bash data/scripts/get_coco.sh && echo "COCO done." &
|
||||
sudo docker pull ultralytics/yolov5:latest && echo "Docker done." &
|
||||
python -m pip install --upgrade pip && pip install -r requirements.txt && python detect.py && echo "Requirements done." &
|
||||
wait && echo "All tasks done." # finish background tasks
|
||||
else
|
||||
echo "Running re-start script." # resume interrupted runs
|
||||
i=0
|
||||
list=$(sudo docker ps -qa) # container list i.e. $'one\ntwo\nthree\nfour'
|
||||
while IFS= read -r id; do
|
||||
((i++))
|
||||
echo "restarting container $i: $id"
|
||||
sudo docker start $id
|
||||
# sudo docker exec -it $id python train.py --resume # single-GPU
|
||||
sudo docker exec -d $id python utils/aws/resume.py # multi-scenario
|
||||
done <<<"$list"
|
||||
fi
|
Reference in New Issue
Block a user