Shell
Tricks
# create a daemon
($HOME/.dropbox-dist/dropboxd &)&
# test whether command exist
if command -v gcc >/dev/null 2>&1 ; then
echo "command is better than which in this case."
fi
# test variable start with
[[ $HOME == /home* ]] && echo "Test var start with prefix"
case $HOME in /home*) echo "or a more portbable way" ;; esac
Bash String tricks
echo ${HOME/home/C} # replace only first match
echo ${HOME//\//\\} # relpace all seperator
sed 's#^home#HOME#' <<< "home here" # here string
Just for bash, a good way to encode "\t\n...
together: echo $'Name\tAge\n"Bob"\t24\nMary\t36'
. See more on ANSI-C Quoting.
One line command
-
Using
ssh server "timedatectl set-time $(date +%H:%M:%S)"
to manually set the server’s time. -
加解密tar文件:
tar -czf - * | openssl enc -e -aes256 -out - | openssl enc -d -aes256 -in - | tar xz -C test
- 使用convert将图片缩小并裁剪至合适的尺寸已进行进一步的机器学习
find . -name '*png' -size +100k -exec convert {} -resize 256x256^ -gravity Center -extent 256x256 samller/{}.jpg \;
- Replacing contents between pattern, used for replacing all tabs in YMAL header part into blanks.
find . -name '*.md' -exec sed -i -e '1,/^---/ s/^\t/ /' {} \;
-
Check port usage:
sudo lsof -i -P -n
- Generate Gif animation with bunch of png files
# At first generate a color palette ffmpeg -pattern_type glob -i 'animation*.png' -vf palettegen palette.png # then create the Gif file ffmpeg -framerate 12 -pattern_type glob -i 'animation*.png' -i palette.png -lavfi paletteuse animation.gif
R
link R with other program
# Setup
system('mkfifo output.fifo')
p_out <- fifo('output.fifo', 'r')
p_in <- pipe('pdflatex &> output.fifo', 'w')
# See what TeX said on startup
readLines(p_out)
readLines(p_out)
# TeX has nothing more to say but return character(0)
# Tell TeX to do something
writeLines('\\documentclass{article}', p_in)
flush(p_in)
# See what it said in response
readLines(p_out)
close(p_out)
close(p_in)
system('rm output.fifo')
reinstall all package after upgrade
# Get currently installed packages
package_df <- as.data.frame(installed.packages("~/R/x86_64-pc-linux-gnu-library/3.2/"))
package_list <- as.character(package_df$Package)
# Re-install
install.packages(package_list)
rare functions
# 安装老版本的包
install.packages("https://cran.r-project.org/src/contrib/Archive/VGAM/VGAM_1.0-3.tar.gz", repos=NULL, type="source")
# 自动填充缺失值
zoo::na.locf()
tidyr::fill()
stringi::stri_trans_totitle() #首字母大写
as.Date(paste(2014, df$Week, 1, sep="-"), "%Y-%U-%u") #周转换为日期
# 获取包内没导出的函数加以修改后重新导入 `trace`
ParamHelpers:::addOptPathEl.OptPathDF # triple colon `:::` 三个冒号可使用包中未导出的对象
getFromNamespace("checkNamed", ns = "checkmate") # fix the Chinese colname problem
assignInNamespace("checkNamed", ns = "checkmate", function(x, type = "named") {
nm <- names(x)
if(type == "strict" && !is.na(nm) &&
all(make.names(nm, unique=TRUE)==nm)){
TRUE
}else{
checkmate::checkNames(nm, type)
}
})
h5::selectDataSpace 选择器函数
selectDataSpace(.Object, offset, count)
selectDataSpace(.Object, elem)
# .Object: 代取数据集
# offset: 与数据集维度相同的vector,设定各维上的数据起点
# count: 与数据集维度相同的vector,设定各维上的从offset开始的数据长度
# elem: 与数据集维度相同列数的matrix,每一行设定单个取值的坐标
ggplot
获取12306数据
# use httr package with global options to disable SSL CA
httr::set_config( httr::config( ssl_verifypeer = 0L, timeout_ms = 10e3 ))
paste0('https://kyfw.12306.cn/otn/czxx/queryByTrainNo?',
'train_no=86000T660600&',
'from_station_telecode=JGJ&',
'to_station_telecode=WUJ&',
'depart_date=2016-10-05') %>%
httr::GET() %>% httr::content(as = "text") %>%
jsonlite::fromJSON() %>% {.$data$data} %>%
View()
# or use RCurl package
paste0('https://kyfw.12306.cn/otn/czxx/queryByTrainNo?',
'train_no=86000T660600&',
'from_station_telecode=JGJ&',
'to_station_telecode=WUJ&',
'depart_date=2016-10-05') %>%
RCurl::getURL(., .opts = list(ssl.verifypeer = FALSE), crlf = TRUE) %>%
jsonlite::fromJSON() %>% {.$data$data} %>%
View()
ROracle
library(ROracle)
oracle_host <- 127.0.0.1
oracle_port <- 550
oracle_svc <- "db"
connect.string <- str_c("(DESCRIPTION=",
"(ADDRESS_LIST=(ADDRESS=(PROTOCOL=tcp)(HOST=",oracle_host,")(PORT=",oracle_port, ")))",
"(CONNECT_DATA=(SERVICE_NAME=",oracle_svc, ")(SERVER = DEDICATED))",
")")
drv <- dbDriver("Oracle")
con <- dbConnect(drv, username="oper", password="gjtestpwd", dbname=connect.string)
# 获取ALL_TAB_COLUMNS这个表的列信息
dbGetQuery(con,
"SELECT COLUMN_NAME, DATA_TYPE FROM ALL_TAB_COLUMNS WHERE TABLE_NAME='ALL_TAB_COLUMNS'") %>%
filter(!DATA_TYPE %in% c("RAW","LONG")) # RAW, LONG这些类型无法转换到R中,SELECT这些列时会出错
dbDisconnect(con)
Latex
常使用的包
\usepackage{array,color} %公式表格
\usepackage{float} %固定表格或图片位置,使用时\begin{table}[H]
临时设置页边距,通常用于将长表格手动左移
\usepackage{chngpage}
\begin {table}[H]
\caption{This is a long table}
\begin{adjustwidth}{-3cm}{} %第一个参数为调整左页边距,第二个参数右页边距可置空
\end{adjustwidth}
\end{table}
多行公式并分组编号
\begin{align}
\begin{split}
x \equiv & u-k_{c} \cos \alpha\\
y \equiv & s+k_{c} \sin \alpha\\
\Delta' = & \frac{\Delta}{\pi}\\
\end{split} \\
%
\begin{split}
\frac{1}{u-x +i ( u-x ) \tan \alpha} = & ( x+ \Delta' ) +i y
\end{split}
\end{align}
Mathematica
Functions
(*重组乱序点列表为连续曲线*)
FindCurvePath;
ListCurvePathPlot;
(*动态画图*)
DynamicModule;
Manipulate;
(*色板,密度图,流线图*)
colorMap = ColorData[{"SunsetColors", {min, max}}];
bar = BarLegend[{colorMap, {min,max}}];
DensityPlot[z, {x, y} \[Element] Rectangle[{x0,y0}, {x1,y1}],
ColorFunctionScaling -> False,
PlotLegends -> bar, ColorFunction -> colorMap]
StreamPlot[{x',y'}, StreamStyle -> White]
(*概率函数*)
TransformedDistribution[ (x - a)/b, x \[Distributed] CauchyDistribution[0, 1]]
PDF[%, x]
(*多项式操作*)
MapThread[#1^#2 &, {RandomSample[{x, y}], RandomInteger[4, 2]}]
Together@Total[1/(%-z)] // Numerator(*通分*)
MonomialList[%^3, {x, y, z}](*分解为单项式*)
Table[{i, j}, {i, 0, 4}, {j, 0, 4}] /.
CoefficientRules[%%^6, {x, y}] //.
{ {_, _} -> 0} // MatrixForm (*美化显示多项式系数*)
(*获取所有符号*)
Cases[expr, z_Symbol :> z, {0, Infinity}] // Union
(*限制条件化简*)
Assuming[x > 0 && y > 0, FunctionExpand[Log[x y]]]
(*先替换参数再计算*)
Hold@f[Range[n]] /. {f -> Total, n -> 4} // ReleaseHold
(*导入数据*)
SemanticImport["data.frame.dat"]//Query[Select[#x>0&]]
ReadList["vector.dat", Number]
(*函数式编程*)
Through[{f, g, h}[x]]=={f[x], g[x], h[x]}
Userful package
(*put them into directory opened with follow command*)
SystemOpen@FileNameJoin[{$UserBaseDirectory, "Applications"}]
Needs["MaTeX`"](*https://github.com/szhorvat/MaTeX*)
Needs["ColorBrewer`"](*https://github.com/wanglongqi/ColorBrewer*)
Needs["ErrorBarPlots`"]
Python
Deep dive to Python3
- How Python load packages?
Python use build-in
site
module to manipulate thesys.path
when initialization the process. See more from the manpage ofsite
package.
Useful packages:
- datashader/bokeh: massive data visulization tools
# 改变jupyter的主题
pip install --upgrade jupyterthemes
jt -t onedork -cellw 90% -lineh 120 -fs 16 -nfs 16 -tfs 18 -dfs 12 -ofs 10 -mathfs 120 -N -T
## 实际上是更改`~/.jupyter/custom/custom.css`,可能要chrome里面开发者工具清空一下缓存
pip install git+https://github.com/scipy/scipy.git # 从源代码仓库安装模块
pandas, numpy, sklearn, …
x = pd.DataFrame([{'a':1, 'b':2}, {'a':3, 'b':4}]); print(x)
# a b
#0 1 2
#1 3 4
x.c = x.b+1 # 坑。。。跟R里面不同,这里c只是x的一个属性而并非新的一列
x.c[x.c>1] = np.nan; print(x)
# a b
#0 1 2
#1 3 4
x = pd.DataFrame([{'a': None, 'b':2}, {'a':3, 'b':4}]) # 坑,pandas在统计的时候可能会丢掉NaN值
x.groupby(['a','b']).size().reset_index(name='counts') # 分组统计行数。
# a b counts
#0 3.0 4 1
# 巨坑,不要把可变对象作为函数默认参数值,下面存wave文件的代码debug了一下午,不指定fd的话会不断累加数据到BytesIO()里面去
def write_wave_audio_file(fd #= BytesIO()
, data: bytes=b'',
sample_rate: int=16000, sample_size: int=2, channel: int=1):
with wave.open(fd, 'wb') as wf:
wf.setnchannels(channel)
wf.setsampwidth(sample_size)
wf.setframerate(sample_rate)
wf.writeframes(data)
return fd
# 转换Unicode字符串到ASCII,去[accents](https://stackoverflow.com/questions/517923/what-is-the-best-way-to-remove-accents-in-a-python-unicode-string)等
import unidecode
import unicodedata
if __name__ == '__main__':
s = 'Ö François ° łl fdasf ?? \t?xxË 你好Κνωσός'
print('build-in pkg:', ''.join(c for c in unicodedata.normalize('NFD', s) if not unicodedata.combining(c)))
print('hand-tuned pkg:', unidecode.unidecode(s))
#build-in pkg: O Francois ° łl fdasf ?? ?xxE 你好Κνωσος
#hand-tuned pkg: O Francois deg ll fdasf ?? ?xxE Ni Hao Knosos
pytorch
torch.save
会保存模型定义的所有源代码,只保存model.pt
是不够的,还需要保存对应的模型定义文件model.py
及相关的import文件
index_select
can be correctly back-propagated
import torch
import numpy as np
np.random.seed(0)
class myLinear(torch.nn.Linear):
def __init__(self, *args, **kwargs):
super(myLinear, self).__init__(*args, **kwargs)
self.loss_func = torch.nn.L1Loss()
self.reset_parameters()
def reset_parameters(self):
self.weight.data = torch.tensor(np.random.rand(*self.weight.shape))
self.bias.data = torch.tensor(np.random.rand(*self.bias.shape))
def forward(self, input, output):
index = torch.LongTensor([0, 3, 5, 0])
w = torch.index_select(self.weight, 0, index)
b = torch.index_select(self.bias, 0, index)
out = torch.nn.functional.linear(input, w, b)
return self.loss_func(out, torch.index_select(output, -1, index))
io_shape = (10, 6)
L = myLinear(in_features=io_shape[0], out_features=io_shape[1])
x = torch.tensor(np.random.rand(10, io_shape[0]), requires_grad=False)
y = torch.tensor(np.random.rand(10, io_shape[1]), requires_grad=False)
loss = L(x, y)
loss.backward()
print(np.around(L.weight.grad.data.numpy(), decimals=3))
# [[0.187 0.256 0.259 0.305 0.214 0.266 0.303 0.245 0.251 0.229]
# [0. 0. 0. 0. 0. 0. 0. 0. 0. 0. ]
# [0. 0. 0. 0. 0. 0. 0. 0. 0. 0. ]
# [0.093 0.128 0.129 0.153 0.107 0.133 0.152 0.123 0.125 0.114]
# [0. 0. 0. 0. 0. 0. 0. 0. 0. 0. ]
# [0.093 0.128 0.129 0.153 0.107 0.133 0.152 0.123 0.125 0.114]]
snippets
pip install git+https://github.com/scipy/scipy.git # 从源代码仓库安装模块
pkg.__version__ # 显示模块版本
class A(ClassB):
def __init__(self, **kwargs):
super(A, self).__init__(**kwargs)
self._name = '_name、_name_、_name__: 单下划线开头,建议性的私有成员,不要在外部访问。'
def __can_not_access_outside(self):
print('__name、 __name_ :双下划线开头,强制的私有成员,但是你依然可以蛮横地在外部危险访问。')
def __doc__(self):
print('__name__:双下划线开头与结尾,特殊成员,与私有性质无关,例如__doc__。')
Html
generate a page break when print the webpage
<p style="page-break-after:always;"></p>
or
<p><!-- pagebreak --></p>
make <h1>
label aligned center
<h1 style="text-align:center;">
or in the css file
h1{ text-align:center; }
Extract the URL for each video link: $x("//*[contains(text(), 'Video')]/@href").forEach(i => console.log(i.value))
.
SQL
Oracle PL/SQL
- 永远不要使用小写的表名或字段名,不要使用关键字(像DAY,DATE这样的)作为字段名,会报
ORA-00904: "day": invalid identifier
错误或ORA-00942: table or view does not exist
错误。实在要使用的话一定在SQL中用"
原样包裹起来。
# connect with sqlplus
export NLS_LANG="SIMPLIFIED CHINESE_CHINA.AL32UTF8"
sqlplus $USERNAME/$PASSWORD@$HOST:$PORT/$SERVICE
SELECT * FROM ALL_TABLES -- 列出所有表
SELECT COLUMN_NAME, DATA_TYPE FROM ALL_TAB_COLUMNS WHERE TABLE_NAME='ALL_TAB_COLUMNS' -- 列出表结构
SELECT * FROM ALL_CONSTRAINTS -- 列出所有约束
/*所有主键(P)和外键(R)约束*/
SELECT A.CONSTRAINT_NAME AS X, A.TABLE_NAME, B.CONSTRAINT_NAME
FROM ALL_CONSTRAINTS A, ALL_CONSTRAINTS B
WHERE A.CONSTRAINT_TYPE = 'R'
AND B.CONSTRAINT_TYPE = 'P'
AND A.R_CONSTRAINT_NAME = B.CONSTRAINT_NAME
SELECT * FROM TABLE_NAME WHERE ROWNUM <= 10000 -- 选取前10000行数据
SELECT * FROM TABLE_NAME SAMPLE(1) -- 随机选取1%的数据
-- 分组后拼接列值
SELECT ID, LISTAGG(HSCODE, ',') WITHIN GROUP (ORDER BY ID) AS HS
FROM TB
GROUP BY ID;
NOSQL
mongoDB
// # run a mongoDB server
// $docker run -d --name mongo -p 6017:27017 -v /media/mongodb:/data/db mongo:3.6 --auth --storageEngine wiredTiger
// $sudo chcon -Rt svirt_sandbox_file_t /media/mongodb
//
// # set it up ...
// $docker exec -it mongo mongo
use admin
db.createUser({ user: 'root', pwd: 'root_passwd', roles: [ { role: 'root', db: 'admin' } ] });
db.auth('root', 'root_passwd')
show users
use admin
db.createUser({ user: 'nick', pwd: 'normal_user_passwd', roles: [ { role: 'readWriteAnyDatabase', db: 'admin' } ] });
show dbs
show collections
JAVA Tips:
- Use command
find . -type f -name '*.jar' |xargs -i -t jar tvf {}
to search for all class bytecode.
Golang
Proxy for offline package installing
One can go get
packages on computer with internet, then the modules will be cached under $GOPATH/pkg/mod/cache
. Just copy those files into restricted computer and then put it under a web server with commands like python3 -m http.server 8000
. Then configure the GOPROXY
with command like go env -w GOPROXY=http://localhost:8000/download,direct
and enable module feature with go env -w GO111MODULE=on
. You are free to go get
then.
Following code show useful golang packages:
export GOROOT=/path/to/go
export GOPATH=$GOROOT/gopath
export PATH=$GOPATH/bin:$GOROOT/bin:$PATH
GOGET="env GOSUMDB=off go get --insecure"
$GOGET github.com/mdempsky/gocode
$GOGET github.com/uudashr/gopkgs/v2/cmd/gopkgs
$GOGET github.com/ramya-rao-a/go-outline
$GOGET github.com/acroca/go-symbols
$GOGET golang.org/x/tools/cmd/guru
$GOGET golang.org/x/tools/cmd/gorename
$GOGET github.com/fatih/gomodifytags
$GOGET github.com/josharian/impl
$GOGET github.com/davidrjenni/reftools/cmd/fillstruct
$GOGET github.com/haya14busa/goplay/cmd/goplay
$GOGET github.com/godoctor/godoctor
$GOGET github.com/go-delve/delve/cmd/dlv
$GOGET github.com/stamblerre/gocode
$GOGET github.com/rogpeppe/godef
$GOGET github.com/sqs/goreturns
$GOGET golang.org/x/lint/golint
$GOGET github.com/cweill/gotests
Under Windows CMD, use set PATH=%GOROOT%\bin;%PATH%
to set the path for go. For Windows Powershell, use $env:GOPATH = 'C:\go'
to set the environment for go.
For go module controlled by go.mod
one may encounter problem with the version of implicit imported module, which is painful in IDE like Goland: the code can compile but IDE complain about unknown revision
hence auto-complete not working.
One simple solution might be go mod vendor
to place all the imported module under vendor
directory but do not check-in it into repo.
Hexo
To extend syntax of Markdown under Hexo, registering a function to Hexo by add a file like following:
js themes/Wikitten/scripts/more-tags.js
hexo.extend.tag.register('alert', function (args) {
return '<a class="btn">' +args+ '</a>';
});
Then you can use tag { % alert args-for-this-tag %}
inside any markdown file. When Hexo start to generate the whole website, it will first load all scripts under scripts/
and use the function to translate the markdown file.