Snippets

2016年07月01日

Shell

Tricks

($HOME/.dropbox-dist/dropboxd &)& #create a daemon

# 使用convert将图片缩小并裁剪至合适的尺寸已进行进一步的机器学习。
find . -name '*png' -size +100k -exec convert {} -resize 256x256^ -gravity Center -extent 256x256 samller/{}.jpg \;

R

# Setup
system('mkfifo output.fifo')
p_out <- fifo('output.fifo', 'r')
p_in <- pipe('pdflatex &> output.fifo', 'w')

# See what TeX said on startup
readLines(p_out)

readLines(p_out)
# TeX has nothing more to say but return character(0) 

# Tell TeX to do something
writeLines('\\documentclass{article}', p_in)
flush(p_in)

# See what it said in response
readLines(p_out)

close(p_out)
close(p_in)
system('rm output.fifo')

reinstall all package after upgrade

# Get currently installed packages
package_df <- as.data.frame(installed.packages("~/R/x86_64-pc-linux-gnu-library/3.2/"))
package_list <- as.character(package_df$Package)
# Re-install
install.packages(package_list)

rare functions

# 安装老版本的包
install.packages("https://cran.r-project.org/src/contrib/Archive/VGAM/VGAM_1.0-3.tar.gz", repos=NULL, type="source")

# 自动填充缺失值
zoo::na.locf() 
tidyr::fill()

stringi::stri_trans_totitle() #首字母大写
as.Date(paste(2014, df$Week, 1, sep="-"), "%Y-%U-%u") #周转换为日期

# 获取包内没导出的函数加以修改后重新导入 `trace`
ParamHelpers:::addOptPathEl.OptPathDF # triple colon `:::` 三个冒号可使用包中未导出的对象
getFromNamespace("checkNamed", ns = "checkmate") # fix the Chinese colname problem
assignInNamespace("checkNamed", ns = "checkmate", function(x, type = "named") {
  nm <- names(x)
  if(type == "strict" && !is.na(nm) && 
     all(make.names(nm, unique=TRUE)==nm)){
    TRUE
  }else{
    checkmate::checkNames(nm, type)
  }
})

h5::selectDataSpace 选择器函数

selectDataSpace(.Object, offset, count) 
selectDataSpace(.Object, elem)
# .Object: 代取数据集
# offset: 与数据集维度相同的vector,设定各维上的数据起点
# count: 与数据集维度相同的vector,设定各维上的从offset开始的数据长度
# elem: 与数据集维度相同列数的matrix,每一行设定单个取值的坐标

ggplot

获取12306数据

# use httr package with global options to disable SSL CA
httr::set_config( httr::config( ssl_verifypeer = 0L, timeout_ms = 10e3 ))
paste0('https://kyfw.12306.cn/otn/czxx/queryByTrainNo?',
       'train_no=86000T660600&',
       'from_station_telecode=JGJ&',
       'to_station_telecode=WUJ&',
       'depart_date=2016-10-05') %>% 
  httr::GET() %>% httr::content(as = "text") %>%
  jsonlite::fromJSON() %>% {.$data$data} %>%
  View()
# or use RCurl package
paste0('https://kyfw.12306.cn/otn/czxx/queryByTrainNo?',
       'train_no=86000T660600&',
       'from_station_telecode=JGJ&',
       'to_station_telecode=WUJ&',
       'depart_date=2016-10-05') %>%
  RCurl::getURL(., .opts = list(ssl.verifypeer = FALSE), crlf = TRUE) %>% 
  jsonlite::fromJSON() %>% {.$data$data} %>%
  View()

ROracle

library(ROracle)

oracle_host <- 127.0.0.1
oracle_port <- 550
oracle_svc <- "db"

connect.string <- str_c("(DESCRIPTION=",
                        "(ADDRESS_LIST=(ADDRESS=(PROTOCOL=tcp)(HOST=",oracle_host,")(PORT=",oracle_port, ")))",
                        "(CONNECT_DATA=(SERVICE_NAME=",oracle_svc, ")(SERVER = DEDICATED))",
                        ")")

drv <- dbDriver("Oracle")
con <- dbConnect(drv, username="oper", password="gjtestpwd", dbname=connect.string)

# 获取ALL_TAB_COLUMNS这个表的列信息
dbGetQuery(con,
           "SELECT COLUMN_NAME, DATA_TYPE FROM ALL_TAB_COLUMNS WHERE TABLE_NAME='ALL_TAB_COLUMNS'") %>%
  filter(!DATA_TYPE %in% c("RAW","LONG")) # RAW, LONG这些类型无法转换到R中,SELECT这些列时会出错

dbDisconnect(con)

Latex

常使用的包

\usepackage{array,color} %公式表格
\usepackage{float} %固定表格或图片位置,使用时\begin{table}[H]

临时设置页边距,通常用于将长表格手动左移

\usepackage{chngpage}

\begin {table}[H]
\caption{This is a long table}
\begin{adjustwidth}{-3cm}{} %第一个参数为调整左页边距,第二个参数右页边距可置空
\end{adjustwidth}
\end{table}

多行公式并分组编号

\begin{align}
  \begin{split}
      x \equiv & u-k_{c} \cos \alpha\\
      y \equiv & s+k_{c} \sin \alpha\\
      \Delta' = & \frac{\Delta}{\pi}\\
  \end{split} \\
  %
  \begin{split}
      \frac{1}{u-x +i  ( u-x ) \tan \alpha} = & ( x+ \Delta' ) +i y
  \end{split}
\end{align}

Mathematica

Functions

(*重组乱序点列表为连续曲线*)
FindCurvePath;
ListCurvePathPlot; 

(*动态画图*)
DynamicModule;
Manipulate;

(*色板,密度图,流线图*)
colorMap = ColorData[{"SunsetColors", {min, max}}];
bar = BarLegend[{colorMap, {min,max}}];
DensityPlot[z, {x, y} \[Element] Rectangle[{x0,y0}, {x1,y1}], 
    ColorFunctionScaling -> False,
    PlotLegends -> bar, ColorFunction -> colorMap]
StreamPlot[{x',y'}, StreamStyle -> White]

(*概率函数*)
TransformedDistribution[ (x - a)/b, x \[Distributed] CauchyDistribution[0, 1]]
PDF[%, x]

(*多项式操作*)
MapThread[#1^#2 &, {RandomSample[{x, y}], RandomInteger[4, 2]}]
Together@Total[1/(%-z)] // Numerator(*通分*)
MonomialList[%^3, {x, y, z}](*分解为单项式*)
Table[{i, j}, {i, 0, 4}, {j, 0, 4}] /. 
    CoefficientRules[%%^6, {x, y}] //.
    { {_, _} -> 0} // MatrixForm (*美化显示多项式系数*)

(*获取所有符号*)
Cases[expr, z_Symbol :> z, {0, Infinity}] // Union
(*限制条件化简*)
Assuming[x > 0 && y > 0, FunctionExpand[Log[x y]]]
(*先替换参数再计算*)
Hold@f[Range[n]] /. {f -> Total, n -> 4} // ReleaseHold


(*导入数据*)
SemanticImport["data.frame.dat"]//Query[Select[#x>0&]]
ReadList["vector.dat", Number]

(*函数式编程*)
Through[{f, g, h}[x]]=={f[x], g[x], h[x]}

Userful package

(*put them into directory opened with follow command*)
SystemOpen@FileNameJoin[{$UserBaseDirectory, "Applications"}]

Needs["MaTeX`"](*https://github.com/szhorvat/MaTeX*)
Needs["ColorBrewer`"](*https://github.com/wanglongqi/ColorBrewer*)
Needs["ErrorBarPlots`"]

Python

# 改变jupyter的主题
pip install --upgrade jupyterthemes
jt -t onedork -cellw 90% -lineh 120 -fs 16 -nfs 16 -tfs 18 -dfs 12 -ofs 10 -mathfs 120 -N -T
## 实际上是更改`~/.jupyter/custom/custom.css`,可能要chrome里面开发者工具清空一下缓存

pip install git+https://github.com/scipy/scipy.git # 从源代码仓库安装模块

pandas, numpy, sklearn, …

> x = pd.DataFrame([{'a':1, 'b':2}, {'a':3, 'b':4}]); print(x)
#   a  b
#0  1  2
#1  3  4
> x.c = x.b+1 # 坑。。。跟R里面不同,这里c只是x的一个属性而并非新的一列
> x.c[x.c>1] = np.nan; print(x)
#   a  b
#0  1  2
#1  3  4
 

snippets

pip install git+https://github.com/scipy/scipy.git # 从源代码仓库安装模块
pkg.__version__ # 显示模块版本

class A(ClassB):
    def __init__(self, **kwargs):
        super(A, self).__init__(**kwargs)
	self._name = '_name、_name_、_name__: 单下划线开头,建议性的私有成员,不要在外部访问。'

    def __can_not_access_outside(self):
        print('__name、 __name_ :双下划线开头,强制的私有成员,但是你依然可以蛮横地在外部危险访问。')

    def __doc__(self):
        print('__name__:双下划线开头与结尾,特殊成员,与私有性质无关,例如__doc__。')


	

Html

generate a page break when print the webpage

<p style="page-break-after:always;"></p>
or
<p><!-- pagebreak --></p>

make <h1> label aligned center

<h1 style="text-align:center;">
or in the css file
h1{ text-align:center; }

Bash

Just for bash, a good way to encode "\t\n... together: echo $'Name\tAge\n"Bob"\t24\nMary\t36' ANSI-C Quoting

SQL

Oracle PL/SQL

  • 永远不要使用小写的表名或字段名,不要使用关键字(像DAY,DATE这样的)作为字段名,会报ORA-00904: "day": invalid identifier错误或ORA-00942: table or view does not exist错误。实在要使用的话一定在SQL中用"原样包裹起来。
# connect with sqlplus
export NLS_LANG="SIMPLIFIED CHINESE_CHINA.AL32UTF8"
sqlplus $USERNAME/$PASSWORD@$HOST:$PORT/$SERVICE
SELECT * FROM ALL_TABLES -- 列出所有表
SELECT COLUMN_NAME, DATA_TYPE FROM ALL_TAB_COLUMNS WHERE TABLE_NAME='ALL_TAB_COLUMNS' -- 列出表结构
SELECT * FROM ALL_CONSTRAINTS -- 列出所有约束

/*所有主键(P)和外键(R)约束*/
SELECT A.CONSTRAINT_NAME AS X, A.TABLE_NAME, B.CONSTRAINT_NAME 
	FROM ALL_CONSTRAINTS A, ALL_CONSTRAINTS B 
	WHERE A.CONSTRAINT_TYPE = 'R' 
		AND B.CONSTRAINT_TYPE = 'P' 
		AND A.R_CONSTRAINT_NAME = B.CONSTRAINT_NAME

SELECT * FROM TABLE_NAME WHERE ROWNUM <= 10000 -- 选取前10000行数据
SELECT * FROM TABLE_NAME SAMPLE(1) -- 随机选取1%的数据


-- 分组后拼接列值
SELECT ID, LISTAGG(HSCODE, ',') WITHIN GROUP (ORDER BY ID) AS HS
	FROM TB
	GROUP BY ID;

NOSQL

mongoDB

// # run a mongoDB server
// $docker run -d --name mongo -p 6017:27017 -v /media/mongodb:/data/db mongo:3.6 --auth --storageEngine wiredTiger
// $sudo chcon -Rt svirt_sandbox_file_t  /media/mongodb
//
// # set it up ...
// $docker exec -it mongo mongo

use admin
db.createUser({ user: 'root', pwd: 'root_passwd', roles: [ { role: 'root', db: 'admin' } ] });
db.auth('root', 'root_passwd')
show users

use admin
db.createUser({ user: 'nick', pwd: 'normal_user_passwd', roles: [ { role: 'readWriteAnyDatabase', db: 'admin' } ] });

show dbs
show collections