Depoly your flask app into Heroku Fix shit IE code manually ISBN to Book Category by Scraping DangDang A Generic Makefile for C/C++ Program Configure Raspberry pi Remove watermark with PyPDF2 tips for docker Anaconda+TensorFlow+CUDA Snippets Configure Remote Mathematica Kernel Build your own ngrok server Access Array SSL VPN 使用Rstudio制作html5幻灯片 tips for Mac OS X system Tips for ipython notebook 配置Ubuntu server + Openbox (Obuntu) tips for Vimperator tips for Vim 安装CUDA My First Jekyll Blog rsync常见选项 在Linux中读取Ipod touch的文件 tip for texmacs 在VPS上建站的一些tip Gnuplot绘图札记 Samba系统和autofs自动挂载 Linux中alsamixer声卡无法录音 搭建自己的RSS订阅器——Tiny Tiny RSS Grub2引导安装Ubuntu awk tips 将Ubuntu系统装入U盘 The Great Rtorrent 编译GCC 再这样剁手!!!该死的libgd 使用ulimit进行资源限制 使用SSH代理上IPV6 使用RCurl抓取网页数据 修复Ubuntu Grub记 openbox中的文件关联 在Ubuntu 12.04下编译qtiplot 处理BCM4312网卡驱动纪实 配置我的Ubuntu Server记 Cygwin杂记 Linux 使普通用户具有以超级权限执行脚本 让firefox自定义地处理文件类型 WordPress优秀主题及插件 在phpcloud上搭建wordpress UBUNTU下用pptpd做VPN server ubuntu升级内核过后的一些问题 安装telnet服务 kubuntu札记 64位kubuntu札记 统计软件R virtualbox stardict星际译王 Ubuntu重装windows系统后的grub引导修复 SSH服务及花生壳域名解析 采用cbp2make工具由code::blocks工程创建makefile文件 UBUNTU 札记

Snippets

2016年07月01日

Shell

Tricks

($HOME/.dropbox-dist/dropboxd &)& #create a daemon

# 使用convert将图片缩小并裁剪至合适的尺寸已进行进一步的机器学习。
find . -name '*png' -size +100k -exec convert {} -resize 256x256^ -gravity Center -extent 256x256 samller/{}.jpg \;

R

# Setup
system('mkfifo output.fifo')
p_out <- fifo('output.fifo', 'r')
p_in <- pipe('pdflatex &> output.fifo', 'w')

# See what TeX said on startup
readLines(p_out)

readLines(p_out)
# TeX has nothing more to say but return character(0) 

# Tell TeX to do something
writeLines('\\documentclass{article}', p_in)
flush(p_in)

# See what it said in response
readLines(p_out)

close(p_out)
close(p_in)
system('rm output.fifo')

reinstall all package after upgrade

# Get currently installed packages
package_df <- as.data.frame(installed.packages("~/R/x86_64-pc-linux-gnu-library/3.2/"))
package_list <- as.character(package_df$Package)
# Re-install
install.packages(package_list)

rare functions

# 安装老版本的包
install.packages("https://cran.r-project.org/src/contrib/Archive/VGAM/VGAM_1.0-3.tar.gz", repos=NULL, type="source")

# 自动填充缺失值
zoo::na.locf() 
tidyr::fill()

stringi::stri_trans_totitle() #首字母大写
as.Date(paste(2014, df$Week, 1, sep="-"), "%Y-%U-%u") #周转换为日期

# 获取包内没导出的函数加以修改后重新导入 `trace`
ParamHelpers:::addOptPathEl.OptPathDF # triple colon `:::` 三个冒号可使用包中未导出的对象
getFromNamespace("checkNamed", ns = "checkmate") # fix the Chinese colname problem
assignInNamespace("checkNamed", ns = "checkmate", function(x, type = "named") {
  nm <- names(x)
  if(type == "strict" && !is.na(nm) && 
     all(make.names(nm, unique=TRUE)==nm)){
    TRUE
  }else{
    checkmate::checkNames(nm, type)
  }
})

h5::selectDataSpace 选择器函数

selectDataSpace(.Object, offset, count) 
selectDataSpace(.Object, elem)
# .Object: 代取数据集
# offset: 与数据集维度相同的vector,设定各维上的数据起点
# count: 与数据集维度相同的vector,设定各维上的从offset开始的数据长度
# elem: 与数据集维度相同列数的matrix,每一行设定单个取值的坐标

ggplot

获取12306数据

# use httr package with global options to disable SSL CA
httr::set_config( httr::config( ssl_verifypeer = 0L, timeout_ms = 10e3 ))
paste0('https://kyfw.12306.cn/otn/czxx/queryByTrainNo?',
       'train_no=86000T660600&',
       'from_station_telecode=JGJ&',
       'to_station_telecode=WUJ&',
       'depart_date=2016-10-05') %>% 
  httr::GET() %>% httr::content(as = "text") %>%
  jsonlite::fromJSON() %>% {.$data$data} %>%
  View()
# or use RCurl package
paste0('https://kyfw.12306.cn/otn/czxx/queryByTrainNo?',
       'train_no=86000T660600&',
       'from_station_telecode=JGJ&',
       'to_station_telecode=WUJ&',
       'depart_date=2016-10-05') %>%
  RCurl::getURL(., .opts = list(ssl.verifypeer = FALSE), crlf = TRUE) %>% 
  jsonlite::fromJSON() %>% {.$data$data} %>%
  View()

ROracle

library(ROracle)

oracle_host <- 127.0.0.1
oracle_port <- 550
oracle_svc <- "db"

connect.string <- str_c("(DESCRIPTION=",
                        "(ADDRESS_LIST=(ADDRESS=(PROTOCOL=tcp)(HOST=",oracle_host,")(PORT=",oracle_port, ")))",
                        "(CONNECT_DATA=(SERVICE_NAME=",oracle_svc, ")(SERVER = DEDICATED))",
                        ")")

drv <- dbDriver("Oracle")
con <- dbConnect(drv, username="oper", password="gjtestpwd", dbname=connect.string)

# 获取ALL_TAB_COLUMNS这个表的列信息
dbGetQuery(con,
           "SELECT COLUMN_NAME, DATA_TYPE FROM ALL_TAB_COLUMNS WHERE TABLE_NAME='ALL_TAB_COLUMNS'") %>%
  filter(!DATA_TYPE %in% c("RAW","LONG")) # RAW, LONG这些类型无法转换到R中,SELECT这些列时会出错

dbDisconnect(con)

Latex

常使用的包

\usepackage{array,color} %公式表格
\usepackage{float} %固定表格或图片位置,使用时\begin{table}[H]

临时设置页边距,通常用于将长表格手动左移

\usepackage{chngpage}

\begin {table}[H]
\caption{This is a long table}
\begin{adjustwidth}{-3cm}{} %第一个参数为调整左页边距,第二个参数右页边距可置空
\end{adjustwidth}
\end{table}

多行公式并分组编号

\begin{align}
  \begin{split}
      x \equiv & u-k_{c} \cos \alpha\\
      y \equiv & s+k_{c} \sin \alpha\\
      \Delta' = & \frac{\Delta}{\pi}\\
  \end{split} \\
  %
  \begin{split}
      \frac{1}{u-x +i  ( u-x ) \tan \alpha} = & ( x+ \Delta' ) +i y
  \end{split}
\end{align}

Mathematica

Functions

(*重组乱序点列表为连续曲线*)
FindCurvePath;
ListCurvePathPlot; 

(*动态画图*)
DynamicModule;
Manipulate;

(*色板,密度图,流线图*)
colorMap = ColorData[{"SunsetColors", {min, max}}];
bar = BarLegend[{colorMap, {min,max}}];
DensityPlot[z, {x, y} \[Element] Rectangle[{x0,y0}, {x1,y1}], 
    ColorFunctionScaling -> False,
    PlotLegends -> bar, ColorFunction -> colorMap]
StreamPlot[{x',y'}, StreamStyle -> White]

(*概率函数*)
TransformedDistribution[ (x - a)/b, x \[Distributed] CauchyDistribution[0, 1]]
PDF[%, x]

(*多项式操作*)
MapThread[#1^#2 &, {RandomSample[{x, y}], RandomInteger[4, 2]}]
Together@Total[1/(%-z)] // Numerator(*通分*)
MonomialList[%^3, {x, y, z}](*分解为单项式*)
Table[{i, j}, {i, 0, 4}, {j, 0, 4}] /. 
    CoefficientRules[%%^6, {x, y}] //.
    { {_, _} -> 0} // MatrixForm (*美化显示多项式系数*)

(*获取所有符号*)
Cases[expr, z_Symbol :> z, {0, Infinity}] // Union
(*限制条件化简*)
Assuming[x > 0 && y > 0, FunctionExpand[Log[x y]]]
(*先替换参数再计算*)
Hold@f[Range[n]] /. {f -> Total, n -> 4} // ReleaseHold


(*导入数据*)
SemanticImport["data.frame.dat"]//Query[Select[#x>0&]]
ReadList["vector.dat", Number]

(*函数式编程*)
Through[{f, g, h}[x]]=={f[x], g[x], h[x]}

Userful package

(*put them into directory opened with follow command*)
SystemOpen@FileNameJoin[{$UserBaseDirectory, "Applications"}]

Needs["MaTeX`"](*https://github.com/szhorvat/MaTeX*)
Needs["ColorBrewer`"](*https://github.com/wanglongqi/ColorBrewer*)
Needs["ErrorBarPlots`"]

Python

# 改变jupyter的主题
pip install --upgrade jupyterthemes
jt -t onedork -cellw 90% -lineh 120 -fs 16 -nfs 16 -tfs 18 -dfs 12 -ofs 10 -mathfs 120 -N -T
## 实际上是更改`~/.jupyter/custom/custom.css`,可能要chrome里面开发者工具清空一下缓存

pip install git+https://github.com/scipy/scipy.git # 从源代码仓库安装模块

pandas, numpy, sklearn, …

> x = pd.DataFrame([{'a':1, 'b':2}, {'a':3, 'b':4}]); print(x)
#   a  b
#0  1  2
#1  3  4
> x.c = x.b+1 # 坑。。。跟R里面不同,这里c只是x的一个属性而并非新的一列
> x.c[x.c>1] = np.nan; print(x)
#   a  b
#0  1  2
#1  3  4
 
> x = pd.DataFrame([{'a': None, 'b':2}, {'a':3, 'b':4}]) # 坑,pandas在统计的时候可能会丢掉NaN值
> x.groupby(['a','b']).size().reset_index(name='counts') # 分组统计行数。
#     a  b  counts
#0  3.0  4       1

> # 巨坑,不要把可变对象作为函数默认参数值,下面存wave文件的代码debug了一下午,不指定fd的话会不断累加数据到BytesIO()里面去
> def write_wave_audio_file(fd #= BytesIO()
>         , data: bytes=b'',
>         sample_rate: int=16000, sample_size: int=2, channel: int=1):
> 
>     with wave.open(fd, 'wb') as wf:
>         wf.setnchannels(channel)
>         wf.setsampwidth(sample_size)
>         wf.setframerate(sample_rate)
>         wf.writeframes(data)
> 
>     return fd

pytorch

index_select can be correctly back-propagated

import torch
import numpy as np

np.random.seed(0)


class myLinear(torch.nn.Linear):
    def __init__(self, *args, **kwargs):
        super(myLinear, self).__init__(*args, **kwargs)
        self.loss_func = torch.nn.L1Loss()
        self.reset_parameters()

    def reset_parameters(self):
        self.weight.data = torch.tensor(np.random.rand(*self.weight.shape))
        self.bias.data = torch.tensor(np.random.rand(*self.bias.shape))

    def forward(self, input, output):
        index = torch.LongTensor([0, 3, 5, 0])
        w = torch.index_select(self.weight, 0, index)
        b = torch.index_select(self.bias, 0, index)

        out = torch.nn.functional.linear(input, w, b)

        return self.loss_func(out, torch.index_select(output, -1, index))


io_shape = (10, 6)

L = myLinear(in_features=io_shape[0], out_features=io_shape[1])

x = torch.tensor(np.random.rand(10, io_shape[0]), requires_grad=False)
y = torch.tensor(np.random.rand(10, io_shape[1]), requires_grad=False)

loss = L(x, y)

loss.backward()

print(np.around(L.weight.grad.data.numpy(), decimals=3))

# [[0.187 0.256 0.259 0.305 0.214 0.266 0.303 0.245 0.251 0.229]
#  [0.    0.    0.    0.    0.    0.    0.    0.    0.    0.   ]
#  [0.    0.    0.    0.    0.    0.    0.    0.    0.    0.   ]
#  [0.093 0.128 0.129 0.153 0.107 0.133 0.152 0.123 0.125 0.114]
#  [0.    0.    0.    0.    0.    0.    0.    0.    0.    0.   ]
#  [0.093 0.128 0.129 0.153 0.107 0.133 0.152 0.123 0.125 0.114]]

snippets

pip install git+https://github.com/scipy/scipy.git # 从源代码仓库安装模块
pkg.__version__ # 显示模块版本

class A(ClassB):
    def __init__(self, **kwargs):
        super(A, self).__init__(**kwargs)
	self._name = '_name、_name_、_name__: 单下划线开头,建议性的私有成员,不要在外部访问。'

    def __can_not_access_outside(self):
        print('__name、 __name_ :双下划线开头,强制的私有成员,但是你依然可以蛮横地在外部危险访问。')

    def __doc__(self):
        print('__name__:双下划线开头与结尾,特殊成员,与私有性质无关,例如__doc__。')


	

Html

generate a page break when print the webpage

<p style="page-break-after:always;"></p>
or
<p><!-- pagebreak --></p>

make <h1> label aligned center

<h1 style="text-align:center;">
or in the css file
h1{ text-align:center; }

Bash

Just for bash, a good way to encode "\t\n... together: echo $'Name\tAge\n"Bob"\t24\nMary\t36' ANSI-C Quoting

SQL

Oracle PL/SQL

  • 永远不要使用小写的表名或字段名,不要使用关键字(像DAY,DATE这样的)作为字段名,会报ORA-00904: "day": invalid identifier错误或ORA-00942: table or view does not exist错误。实在要使用的话一定在SQL中用"原样包裹起来。
# connect with sqlplus
export NLS_LANG="SIMPLIFIED CHINESE_CHINA.AL32UTF8"
sqlplus $USERNAME/$PASSWORD@$HOST:$PORT/$SERVICE
SELECT * FROM ALL_TABLES -- 列出所有表
SELECT COLUMN_NAME, DATA_TYPE FROM ALL_TAB_COLUMNS WHERE TABLE_NAME='ALL_TAB_COLUMNS' -- 列出表结构
SELECT * FROM ALL_CONSTRAINTS -- 列出所有约束

/*所有主键(P)和外键(R)约束*/
SELECT A.CONSTRAINT_NAME AS X, A.TABLE_NAME, B.CONSTRAINT_NAME 
	FROM ALL_CONSTRAINTS A, ALL_CONSTRAINTS B 
	WHERE A.CONSTRAINT_TYPE = 'R' 
		AND B.CONSTRAINT_TYPE = 'P' 
		AND A.R_CONSTRAINT_NAME = B.CONSTRAINT_NAME

SELECT * FROM TABLE_NAME WHERE ROWNUM <= 10000 -- 选取前10000行数据
SELECT * FROM TABLE_NAME SAMPLE(1) -- 随机选取1%的数据


-- 分组后拼接列值
SELECT ID, LISTAGG(HSCODE, ',') WITHIN GROUP (ORDER BY ID) AS HS
	FROM TB
	GROUP BY ID;

NOSQL

mongoDB

// # run a mongoDB server
// $docker run -d --name mongo -p 6017:27017 -v /media/mongodb:/data/db mongo:3.6 --auth --storageEngine wiredTiger
// $sudo chcon -Rt svirt_sandbox_file_t  /media/mongodb
//
// # set it up ...
// $docker exec -it mongo mongo

use admin
db.createUser({ user: 'root', pwd: 'root_passwd', roles: [ { role: 'root', db: 'admin' } ] });
db.auth('root', 'root_passwd')
show users

use admin
db.createUser({ user: 'nick', pwd: 'normal_user_passwd', roles: [ { role: 'readWriteAnyDatabase', db: 'admin' } ] });

show dbs
show collections