Shell
Tricks
# create a daemon
( $HOME /.dropbox-dist/dropboxd &) &
# test whether command exist
if command -v gcc > /dev/null 2>&1 ; then
echo "command is better than which in this case."
fi
# test variable start with
[[ $HOME == /home* ]] && echo "Test var start with prefix"
case $HOME in /home* ) echo "or a more portbable way" ;; esac
Bash String tricks
echo ${ HOME /home/C } # replace only first match
echo ${ HOME //\//\\ } # relpace all seperator
sed 's#^home#HOME#' <<< "home here" # here string
Just for bash, a good way to encode "\t\n...
together: echo $'Name\tAge\n"Bob"\t24\nMary\t36'
. See more on ANSI-C Quoting .
One line command
Using ssh server "timedatectl set-time $(date +%H:%M:%S)"
to manually set the server’s time.
加解密tar文件: tar -czf - * | openssl enc -e -aes256 -out - | openssl enc -d -aes256 -in - | tar xz -C test
使用convert将图片缩小并裁剪至合适的尺寸已进行进一步的机器学习
find . -name '*png' -size +100k -exec convert {} -resize 256x256^ -gravity Center -extent 256x256 samller/{} .jpg \;
Replacing contents between pattern, used for replacing all tabs in YMAL header part into blanks.
find . -name '*.md' -exec sed -i -e '1,/^---/ s/^\t/ /' {} \;
Check port usage: sudo lsof -i -P -n
Generate Gif animation with bunch of png files
# At first generate a color palette
ffmpeg -pattern_type glob -i 'animation*.png' -vf palettegen palette.png
# then create the Gif file
ffmpeg -framerate 12 -pattern_type glob -i 'animation*.png' -i palette.png -lavfi paletteuse animation.gif
R
link R with other program
# Setup
system ( 'mkfifo output.fifo' )
p_out <- fifo ( 'output.fifo' , 'r' )
p_in <- pipe ( 'pdflatex &> output.fifo' , 'w' )
# See what TeX said on startup
readLines ( p_out )
readLines ( p_out )
# TeX has nothing more to say but return character(0)
# Tell TeX to do something
writeLines ( '\\documentclass{article}' , p_in )
flush ( p_in )
# See what it said in response
readLines ( p_out )
close ( p_out )
close ( p_in )
system ( 'rm output.fifo' )
reinstall all package after upgrade
# Get currently installed packages
package_df <- as.data.frame ( installed.packages ( "~/R/x86_64-pc-linux-gnu-library/3.2/" ))
package_list <- as.character ( package_df $ Package )
# Re-install
install.packages ( package_list )
rare functions
# 安装老版本的包
install.packages ( "https://cran.r-project.org/src/contrib/Archive/VGAM/VGAM_1.0-3.tar.gz" , repos = NULL , type = "source" )
# 自动填充缺失值
zoo :: na.locf ()
tidyr :: fill ()
stringi :: stri_trans_totitle () #首字母大写
as.Date ( paste ( 2014 , df $ Week , 1 , sep = "-" ), "%Y-%U-%u" ) #周转换为日期
# 获取包内没导出的函数加以修改后重新导入 `trace`
ParamHelpers ::: addOptPathEl.OptPathDF # triple colon `:::` 三个冒号可使用包中未导出的对象
getFromNamespace ( "checkNamed" , ns = "checkmate" ) # fix the Chinese colname problem
assignInNamespace ( "checkNamed" , ns = "checkmate" , function ( x , type = "named" ) {
nm <- names ( x )
if ( type == "strict" && ! is.na ( nm ) &&
all ( make.names ( nm , unique = TRUE ) == nm )){
TRUE
} else {
checkmate :: checkNames ( nm , type )
}
})
h5::selectDataSpace 选择器函数
selectDataSpace ( .Object , offset , count )
selectDataSpace ( .Object , elem )
# .Object: 代取数据集
# offset: 与数据集维度相同的vector,设定各维上的数据起点
# count: 与数据集维度相同的vector,设定各维上的从offset开始的数据长度
# elem: 与数据集维度相同列数的matrix,每一行设定单个取值的坐标
ggplot
获取12306数据
# use httr package with global options to disable SSL CA
httr :: set_config ( httr :: config ( ssl_verifypeer = 0L , timeout_ms = 10e3 ))
paste0 ( 'https://kyfw.12306.cn/otn/czxx/queryByTrainNo?' ,
'train_no=86000T660600&' ,
'from_station_telecode=JGJ&' ,
'to_station_telecode=WUJ&' ,
'depart_date=2016-10-05' ) %>%
httr :: GET () %>% httr :: content ( as = "text" ) %>%
jsonlite :: fromJSON () %>% { . $ data $ data } %>%
View ()
# or use RCurl package
paste0 ( 'https://kyfw.12306.cn/otn/czxx/queryByTrainNo?' ,
'train_no=86000T660600&' ,
'from_station_telecode=JGJ&' ,
'to_station_telecode=WUJ&' ,
'depart_date=2016-10-05' ) %>%
RCurl :: getURL ( . , .opts = list ( ssl.verifypeer = FALSE ), crlf = TRUE ) %>%
jsonlite :: fromJSON () %>% { . $ data $ data } %>%
View ()
ROracle
library ( ROracle )
oracle_host <- 127.0.0.1
oracle_port <- 550
oracle_svc <- "db"
connect.string <- str_c ( "(DESCRIPTION=" ,
"(ADDRESS_LIST=(ADDRESS=(PROTOCOL=tcp)(HOST=" , oracle_host , ")(PORT=" , oracle_port , ")))" ,
"(CONNECT_DATA=(SERVICE_NAME=" , oracle_svc , ")(SERVER = DEDICATED))" ,
")" )
drv <- dbDriver ( "Oracle" )
con <- dbConnect ( drv , username = "oper" , password = "gjtestpwd" , dbname = connect.string )
# 获取ALL_TAB_COLUMNS这个表的列信息
dbGetQuery ( con ,
"SELECT COLUMN_NAME, DATA_TYPE FROM ALL_TAB_COLUMNS WHERE TABLE_NAME='ALL_TAB_COLUMNS'" ) %>%
filter ( ! DATA_TYPE %in% c ( "RAW" , "LONG" )) # RAW, LONG这些类型无法转换到R中,SELECT这些列时会出错
dbDisconnect ( con )
Latex
常使用的包
\usepackage { array,color} %公式表格
\usepackage { float} %固定表格或图片位置,使用时\begin{table}[H]
临时设置页边距,通常用于将长表格手动左移
\usepackage { chngpage}
\begin { table} [H]
\caption { This is a long table}
\begin{adjustwidth} { -3cm}{} %第一个参数为调整左页边距,第二个参数右页边距可置空
\end{adjustwidth}
\end{table}
多行公式并分组编号
\begin{align}
\begin{split}
x \equiv & u-k_{ c} \cos \alpha\\
y \equiv & s+k_{ c} \sin \alpha\\
\Delta ' = & \frac { \Delta }{ \pi } \\
\end{split} \\
%
\begin{split}
\frac { 1}{ u-x +i ( u-x ) \tan \alpha } = & ( x+ \Delta ' ) +i y
\end{split}
\end{align}
Mathematica
Functions
(*重组乱序点列表为连续曲线*)
FindCurvePath ;
ListCurvePathPlot ;
(*动态画图*)
DynamicModule ;
Manipulate ;
(*色板,密度图,流线图*)
colorMap = ColorData [{ "SunsetColors" , { min , max }}] ;
bar = BarLegend [{ colorMap , { min , max }}] ;
DensityPlot [ z , { x , y } \[Element] Rectangle [{ x0 , y0 } , { x1 , y1 }] ,
ColorFunctionScaling -> False ,
PlotLegends -> bar , ColorFunction -> colorMap ]
StreamPlot [{ x ', y ' } , StreamStyle -> White ]
(*概率函数*)
TransformedDistribution [ ( x - a ) / b , x \[Distributed] CauchyDistribution [ 0 , 1 ]]
PDF [ %, x ]
(*多项式操作*)
MapThread [ #1 ^ #2 &, { RandomSample [{ x , y }] , RandomInteger [ 4 , 2 ]}]
Together @ Total [ 1 / ( %- z )] // Numerator (*通分*)
MonomialList [ %^ 3 , { x , y , z }] (*分解为单项式*)
Table [{ i , j } , { i , 0 , 4 } , { j , 0 , 4 }] /.
CoefficientRules [ %%^ 6 , { x , y }] //.
{ { _, _ } -> 0 } // MatrixForm (*美化显示多项式系数*)
(*获取所有符号*)
Cases [ expr , z _ Symbol :> z , { 0 , Infinity }] // Union
(*限制条件化简*)
Assuming [ x > 0 && y > 0 , FunctionExpand [ Log [ x y ]]]
(*先替换参数再计算*)
Hold @ f [ Range [ n ]] /. { f -> Total , n -> 4 } // ReleaseHold
(*导入数据*)
SemanticImport [ "data.frame.dat" ] // Query [ Select [ #x > 0 & ]]
ReadList [ "vector.dat" , Number ]
(*函数式编程*)
Through [{ f , g , h }[ x ]] == { f [ x ] , g [ x ] , h [ x ]}
Userful package
(*put them into directory opened with follow command*)
SystemOpen @ FileNameJoin [{ $UserBaseDirectory , "Applications" }]
Needs [ "MaTeX`" ] (*https://github.com/szhorvat/MaTeX*)
Needs [ "ColorBrewer`" ] (*https://github.com/wanglongqi/ColorBrewer*)
Needs [ "ErrorBarPlots`" ]
Python
Deep dive to Python3
How Python load packages?
Python use build-in site
module to manipulate the sys.path
when initialization the process. See more from the manpage of site
package.
Useful packages:
datashader/bokeh: massive data visulization tools
# 改变jupyter的主题
pip install --upgrade jupyterthemes
jt -t onedork -cellw 90% -lineh 120 -fs 16 -nfs 16 -tfs 18 -dfs 12 -ofs 10 -mathfs 120 -N -T
## 实际上是更改`~/.jupyter/custom/custom.css`,可能要chrome里面开发者工具清空一下缓存
pip install git+https://github.com/scipy/scipy.git # 从源代码仓库安装模块
pandas, numpy, sklearn, …
x = pd . DataFrame ([{ 'a' : 1 , 'b' : 2 }, { 'a' : 3 , 'b' : 4 }]); print ( x )
# a b
#0 1 2
#1 3 4
x . c = x . b + 1 # 坑。。。跟R里面不同,这里c只是x的一个属性而并非新的一列
x . c [ x . c > 1 ] = np . nan ; print ( x )
# a b
#0 1 2
#1 3 4
x = pd . DataFrame ([{ 'a' : None , 'b' : 2 }, { 'a' : 3 , 'b' : 4 }]) # 坑,pandas在统计的时候可能会丢掉NaN值
x . groupby ([ 'a' , 'b' ]). size (). reset_index ( name = 'counts' ) # 分组统计行数。
# a b counts
#0 3.0 4 1
# 巨坑,不要把可变对象作为函数默认参数值,下面存wave文件的代码debug了一下午,不指定fd的话会不断累加数据到BytesIO()里面去
def write_wave_audio_file ( fd #= BytesIO()
, data : bytes = b '' ,
sample_rate : int = 16000 , sample_size : int = 2 , channel : int = 1 ):
with wave . open ( fd , 'wb' ) as wf :
wf . setnchannels ( channel )
wf . setsampwidth ( sample_size )
wf . setframerate ( sample_rate )
wf . writeframes ( data )
return fd
# 转换Unicode字符串到ASCII,去[accents](https://stackoverflow.com/questions/517923/what-is-the-best-way-to-remove-accents-in-a-python-unicode-string)等
import unidecode
import unicodedata
if __name__ == '__main__' :
s = 'Ö François ° łl fdasf ?? \t ?xxË 你好Κνωσός'
print ( 'build-in pkg:' , '' . join ( c for c in unicodedata . normalize ( 'NFD' , s ) if not unicodedata . combining ( c )))
print ( 'hand-tuned pkg:' , unidecode . unidecode ( s ))
#build-in pkg: O Francois ° łl fdasf ?? ?xxE 你好Κνωσος
#hand-tuned pkg: O Francois deg ll fdasf ?? ?xxE Ni Hao Knosos
pytorch
torch.save
会保存模型定义的所有源代码,只保存model.pt
是不够的 ,还需要保存对应的模型定义文件model.py
及相关的import文件
index_select
can be correctly back-propagated
import torch
import numpy as np
np . random . seed ( 0 )
class myLinear ( torch . nn . Linear ):
def __init__ ( self , * args , ** kwargs ):
super ( myLinear , self ). __init__ ( * args , ** kwargs )
self . loss_func = torch . nn . L1Loss ()
self . reset_parameters ()
def reset_parameters ( self ):
self . weight . data = torch . tensor ( np . random . rand ( * self . weight . shape ))
self . bias . data = torch . tensor ( np . random . rand ( * self . bias . shape ))
def forward ( self , input , output ):
index = torch . LongTensor ([ 0 , 3 , 5 , 0 ])
w = torch . index_select ( self . weight , 0 , index )
b = torch . index_select ( self . bias , 0 , index )
out = torch . nn . functional . linear ( input , w , b )
return self . loss_func ( out , torch . index_select ( output , - 1 , index ))
io_shape = ( 10 , 6 )
L = myLinear ( in_features = io_shape [ 0 ], out_features = io_shape [ 1 ])
x = torch . tensor ( np . random . rand ( 10 , io_shape [ 0 ]), requires_grad = False )
y = torch . tensor ( np . random . rand ( 10 , io_shape [ 1 ]), requires_grad = False )
loss = L ( x , y )
loss . backward ()
print ( np . around ( L . weight . grad . data . numpy (), decimals = 3 ))
# [[0.187 0.256 0.259 0.305 0.214 0.266 0.303 0.245 0.251 0.229]
# [0. 0. 0. 0. 0. 0. 0. 0. 0. 0. ]
# [0. 0. 0. 0. 0. 0. 0. 0. 0. 0. ]
# [0.093 0.128 0.129 0.153 0.107 0.133 0.152 0.123 0.125 0.114]
# [0. 0. 0. 0. 0. 0. 0. 0. 0. 0. ]
# [0.093 0.128 0.129 0.153 0.107 0.133 0.152 0.123 0.125 0.114]]
snippets
pip install git + https : // github . com / scipy / scipy . git # 从源代码仓库安装模块
pkg . __version__ # 显示模块版本
class A ( ClassB ):
def __init__ ( self , ** kwargs ):
super ( A , self ). __init__ ( ** kwargs )
self . _name = '_name、_name_、_name__: 单下划线开头,建议性的私有成员,不要在外部访问。'
def __can_not_access_outside ( self ):
print ( '__name、 __name_ :双下划线开头,强制的私有成员,但是你依然可以蛮横地在外部危险访问。' )
def __doc__ ( self ):
print ( '__name__:双下划线开头与结尾,特殊成员,与私有性质无关,例如__doc__。' )
Html
generate a page break when print the webpage
<p style= "page-break-after:always;" ></p>
or
<p> <!-- pagebreak --> </p>
make <h1>
label aligned center
<h1 style= "text-align:center;" >
or in the css file
h1{ text-align:center; }
SQL
Oracle PL/SQL
永远不要使用小写的表名或字段名,不要使用关键字(像DAY,DATE这样的)作为字段名,会报ORA-00904: "day": invalid identifier
错误或ORA-00942: table or view does not exist
错误。实在要使用的话一定在SQL中用"
原样 包裹起来。
# connect with sqlplus
export NLS_LANG = "SIMPLIFIED CHINESE_CHINA.AL32UTF8"
sqlplus $USERNAME /$PASSWORD @$HOST :$PORT /$SERVICE
SELECT * FROM ALL_TABLES -- 列出所有表
SELECT COLUMN_NAME , DATA_TYPE FROM ALL_TAB_COLUMNS WHERE TABLE_NAME = 'ALL_TAB_COLUMNS' -- 列出表结构
SELECT * FROM ALL_CONSTRAINTS -- 列出所有约束
/*所有主键(P)和外键(R)约束*/
SELECT A . CONSTRAINT_NAME AS X , A . TABLE_NAME , B . CONSTRAINT_NAME
FROM ALL_CONSTRAINTS A , ALL_CONSTRAINTS B
WHERE A . CONSTRAINT_TYPE = 'R'
AND B . CONSTRAINT_TYPE = 'P'
AND A . R_CONSTRAINT_NAME = B . CONSTRAINT_NAME
SELECT * FROM TABLE_NAME WHERE ROWNUM <= 10000 -- 选取前10000行数据
SELECT * FROM TABLE_NAME SAMPLE ( 1 ) -- 随机选取1%的数据
-- 分组后拼接列值
SELECT ID , LISTAGG ( HSCODE , ',' ) WITHIN GROUP ( ORDER BY ID ) AS HS
FROM TB
GROUP BY ID ;
NOSQL
mongoDB
// # run a mongoDB server
// $docker run -d --name mongo -p 6017:27017 -v /media/mongodb:/data/db mongo:3.6 --auth --storageEngine wiredTiger
// $sudo chcon -Rt svirt_sandbox_file_t /media/mongodb
//
// # set it up ...
// $docker exec -it mongo mongo
use admin
db . createUser ({ user : ' root ' , pwd : ' root_passwd ' , roles : [ { role : ' root ' , db : ' admin ' } ] });
db . auth ( ' root ' , ' root_passwd ' )
show users
use admin
db . createUser ({ user : ' nick ' , pwd : ' normal_user_passwd ' , roles : [ { role : ' readWriteAnyDatabase ' , db : ' admin ' } ] });
show dbs
show collections
JAVA Tips:
Use command find . -type f -name '*.jar' |xargs -i -t jar tvf {}
to search for all class bytecode.
Golang
Proxy for offline package installing
One can go get
packages on computer with internet, then the modules will be cached under $GOPATH/pkg/mod/cache
. Just copy those files into restricted computer and then put it under a web server with commands like python3 -m http.server 8000
. Then configure the GOPROXY
with command like go env -w GOPROXY=http://localhost:8000/download,direct
and enable module feature with go env -w GO111MODULE=on
. You are free to go get
then.
Following code show useful golang packages:
export GOROOT = /path/to/go
export GOPATH = $GOROOT /gopath
export PATH = $GOPATH /bin:$GOROOT /bin:$PATH
GOGET = "env GOSUMDB=off go get --insecure"
$GOGET github.com/mdempsky/gocode
$GOGET github.com/uudashr/gopkgs/v2/cmd/gopkgs
$GOGET github.com/ramya-rao-a/go-outline
$GOGET github.com/acroca/go-symbols
$GOGET golang.org/x/tools/cmd/guru
$GOGET golang.org/x/tools/cmd/gorename
$GOGET github.com/fatih/gomodifytags
$GOGET github.com/josharian/impl
$GOGET github.com/davidrjenni/reftools/cmd/fillstruct
$GOGET github.com/haya14busa/goplay/cmd/goplay
$GOGET github.com/godoctor/godoctor
$GOGET github.com/go-delve/delve/cmd/dlv
$GOGET github.com/stamblerre/gocode
$GOGET github.com/rogpeppe/godef
$GOGET github.com/sqs/goreturns
$GOGET golang.org/x/lint/golint
$GOGET github.com/cweill/gotests
Under Windows CMD, use set PATH=%GOROOT%\bin;%PATH%
to set the path for go. For Windows Powershell, use $env:GOPATH = 'C:\go'
to set the environment for go.
For go module controlled by go.mod
one may encounter problem with the version of implicit imported module, which is painful in IDE like Goland: the code can compile but IDE complain about unknown revision
hence auto-complete not working.
One simple solution might be go mod vendor
to place all the imported module under vendor
directory but do not check-in it into repo.
Hexo
To extend syntax of Markdown under Hexo, registering a function to Hexo by add a file like following:
js themes/Wikitten/scripts/more-tags.js
hexo.extend.tag.register('alert', function (args) {
return '<a class="btn">' +args+ '</a>';
});
Then you can use tag { % alert args-for-this-tag %}
inside any markdown file. When Hexo start to generate the whole website, it will first load all scripts under scripts/
and use the function to translate the markdown file.