Commit 858d535e authored by yihua.huang's avatar yihua.huang

remove useless files

parent 2e35e149
This diff is collapsed.
<?xml version="1.0" encoding="UTF-8"?>
<!DOCTYPE plist PUBLIC "-//Apple//DTD PLIST 1.0//EN" "http://www.apple.com/DTDs/PropertyList-1.0.dtd">
<plist version="1.0">
<dict>
<key>ActiveLayerIndex</key>
<integer>0</integer>
<key>ApplicationVersion</key>
<array>
<string>com.omnigroup.OmniGrafflePro</string>
<string>139.16.0.171715</string>
</array>
<key>AutoAdjust</key>
<false/>
<key>BackgroundGraphic</key>
<dict>
<key>Bounds</key>
<string>{{0, 0}, {48, 48}}</string>
<key>Class</key>
<string>SolidGraphic</string>
<key>ID</key>
<integer>2</integer>
<key>Style</key>
<dict>
<key>shadow</key>
<dict>
<key>Draws</key>
<string>NO</string>
</dict>
<key>stroke</key>
<dict>
<key>Draws</key>
<string>NO</string>
</dict>
</dict>
</dict>
<key>BaseZoom</key>
<integer>0</integer>
<key>CanvasOrigin</key>
<string>{0, 0}</string>
<key>CanvasSize</key>
<string>{48, 48}</string>
<key>ColumnAlign</key>
<integer>1</integer>
<key>ColumnSpacing</key>
<real>36</real>
<key>CreationDate</key>
<string>2013-11-10 06:17:01 +0000</string>
<key>Creator</key>
<string>黄 亿华</string>
<key>DisplayScale</key>
<string>1 pt = 1 pt</string>
<key>GraphDocumentVersion</key>
<integer>8</integer>
<key>GraphicsList</key>
<array>
<dict>
<key>Bounds</key>
<string>{{7.5, 24}, {23, 15}}</string>
<key>Class</key>
<string>ShapedGraphic</string>
<key>FitText</key>
<string>YES</string>
<key>Flow</key>
<string>Resize</string>
<key>ID</key>
<integer>45</integer>
<key>Shape</key>
<string>Rectangle</string>
<key>Style</key>
<dict>
<key>fill</key>
<dict>
<key>Draws</key>
<string>NO</string>
</dict>
<key>shadow</key>
<dict>
<key>Draws</key>
<string>NO</string>
</dict>
<key>stroke</key>
<dict>
<key>Draws</key>
<string>NO</string>
</dict>
</dict>
<key>Text</key>
<dict>
<key>Pad</key>
<integer>0</integer>
<key>Text</key>
<string>{\rtf1\ansi\ansicpg936\cocoartf1187\cocoasubrtf400
\cocoascreenfonts1{\fonttbl\f0\fnil\fcharset0 Cochin;}
{\colortbl;\red255\green255\blue255;}
\pard\tx560\tx1120\tx1680\tx2240\tx2800\tx3360\tx3920\tx4480\tx5040\tx5600\tx6160\tx6720\pardirnatural\qc
\f0\fs16 \cf0 Magi
\fs24 c}</string>
<key>VerticalPad</key>
<integer>0</integer>
</dict>
<key>Wrap</key>
<string>NO</string>
</dict>
<dict>
<key>Bounds</key>
<string>{{18, 13}, {19.359630584716797, 18}}</string>
<key>Class</key>
<string>ShapedGraphic</string>
<key>FitText</key>
<string>Vertical</string>
<key>Flow</key>
<string>Resize</string>
<key>FontInfo</key>
<dict>
<key>Color</key>
<dict>
<key>w</key>
<string>0</string>
</dict>
<key>Font</key>
<string>STHeitiSC-Light</string>
<key>Size</key>
<real>6</real>
</dict>
<key>ID</key>
<integer>39</integer>
<key>Shape</key>
<string>Rectangle</string>
<key>Style</key>
<dict>
<key>fill</key>
<dict>
<key>Draws</key>
<string>NO</string>
</dict>
<key>shadow</key>
<dict>
<key>Draws</key>
<string>NO</string>
</dict>
<key>stroke</key>
<dict>
<key>Draws</key>
<string>NO</string>
</dict>
</dict>
<key>Text</key>
<dict>
<key>Text</key>
<string>{\rtf1\ansi\ansicpg936\cocoartf1187\cocoasubrtf400
\cocoascreenfonts1{\fonttbl\f0\fmodern\fcharset0 Courier-Oblique;}
{\colortbl;\red255\green255\blue255;}
\pard\tx560\tx1120\tx1680\tx2240\tx2800\tx3360\tx3920\tx4480\tx5040\tx5600\tx6160\tx6720\pardirnatural\qc
\f0\i\fs14 \cf0 eb}</string>
</dict>
</dict>
<dict>
<key>Class</key>
<string>LineGraphic</string>
<key>FontInfo</key>
<dict>
<key>Font</key>
<string>Helvetica</string>
<key>Size</key>
<real>13</real>
</dict>
<key>ID</key>
<integer>31</integer>
<key>Points</key>
<array>
<string>{6, 11}</string>
<string>{15, 27}</string>
<string>{14, 8}</string>
<string>{21, 26}</string>
<string>{22, 6}</string>
<string>{22, 6}</string>
</array>
<key>Style</key>
<dict>
<key>stroke</key>
<dict>
<key>HeadArrow</key>
<string>0</string>
<key>Legacy</key>
<true/>
<key>LineType</key>
<integer>1</integer>
<key>TailArrow</key>
<string>0</string>
</dict>
</dict>
</dict>
</array>
<key>GridInfo</key>
<dict>
<key>GridSpacing</key>
<real>1</real>
<key>ShowsGrid</key>
<string>YES</string>
<key>SnapsToGrid</key>
<string>YES</string>
</dict>
<key>GuidesLocked</key>
<string>NO</string>
<key>GuidesVisible</key>
<string>YES</string>
<key>HPages</key>
<integer>1</integer>
<key>ImageCounter</key>
<integer>2</integer>
<key>KeepToScale</key>
<false/>
<key>Layers</key>
<array>
<dict>
<key>Lock</key>
<string>NO</string>
<key>Name</key>
<string>图层 1</string>
<key>Print</key>
<string>YES</string>
<key>View</key>
<string>YES</string>
</dict>
</array>
<key>LayoutInfo</key>
<dict>
<key>Animate</key>
<string>NO</string>
<key>circoMinDist</key>
<real>18</real>
<key>circoSeparation</key>
<real>0.0</real>
<key>layoutEngine</key>
<string>dot</string>
<key>neatoSeparation</key>
<real>0.0</real>
<key>twopiSeparation</key>
<real>0.0</real>
</dict>
<key>LinksVisible</key>
<string>NO</string>
<key>MagnetsVisible</key>
<string>NO</string>
<key>MasterSheets</key>
<array/>
<key>ModificationDate</key>
<string>2013-11-10 06:51:47 +0000</string>
<key>Modifier</key>
<string>黄 亿华</string>
<key>NotesVisible</key>
<string>NO</string>
<key>Orientation</key>
<integer>2</integer>
<key>OriginVisible</key>
<string>NO</string>
<key>PageBreaks</key>
<string>YES</string>
<key>PrintInfo</key>
<dict>
<key>NSBottomMargin</key>
<array>
<string>float</string>
<string>41</string>
</array>
<key>NSHorizonalPagination</key>
<array>
<string>coded</string>
<string>BAtzdHJlYW10eXBlZIHoA4QBQISEhAhOU051bWJlcgCEhAdOU1ZhbHVlAISECE5TT2JqZWN0AIWEASqEhAFxlwCG</string>
</array>
<key>NSLeftMargin</key>
<array>
<string>float</string>
<string>18</string>
</array>
<key>NSPaperSize</key>
<array>
<string>size</string>
<string>{594.99997329711914, 842}</string>
</array>
<key>NSPrintReverseOrientation</key>
<array>
<string>int</string>
<string>0</string>
</array>
<key>NSRightMargin</key>
<array>
<string>float</string>
<string>18</string>
</array>
<key>NSTopMargin</key>
<array>
<string>float</string>
<string>18</string>
</array>
</dict>
<key>PrintOnePage</key>
<false/>
<key>ReadOnly</key>
<string>NO</string>
<key>RowAlign</key>
<integer>1</integer>
<key>RowSpacing</key>
<real>36</real>
<key>SheetTitle</key>
<string>版面 1</string>
<key>SmartAlignmentGuidesActive</key>
<string>NO</string>
<key>SmartDistanceGuidesActive</key>
<string>NO</string>
<key>UniqueID</key>
<integer>1</integer>
<key>UseEntirePage</key>
<false/>
<key>VPages</key>
<integer>1</integer>
<key>WindowInfo</key>
<dict>
<key>CurrentSheet</key>
<integer>0</integer>
<key>ExpandedCanvases</key>
<array/>
<key>Frame</key>
<string>{{491, 381}, {498, 477}}</string>
<key>ListView</key>
<true/>
<key>OutlineWidth</key>
<integer>142</integer>
<key>RightSidebar</key>
<false/>
<key>Sidebar</key>
<true/>
<key>SidebarWidth</key>
<integer>116</integer>
<key>VisibleRegion</key>
<string>{{0.125, 0.125}, {47.75, 47.875}}</string>
<key>Zoom</key>
<real>8</real>
<key>ZoomValues</key>
<array>
<array>
<string>版面 1</string>
<real>8</real>
<real>1</real>
</array>
</array>
</dict>
</dict>
</plist>
This diff is collapsed.
This diff is collapsed.
<mockup version="1.0" skin="sketch" fontFace="Balsamiq Sans" measuredW="1154" measuredH="470" mockupW="709" mockupH="470">
<controls>
<control controlID="0" controlTypeID="com.balsamiq.mockups::BrowserWindow" x="445" y="0" w="709" h="470" measuredW="450" measuredH="400" zOrder="0" locked="false" isInGroup="-1">
<controlProperties>
<text>A%20Web%20Page%0Ahttp%3A//</text>
</controlProperties>
</control>
</controls>
</mockup>
\ No newline at end of file
This diff is collapsed.
<mockup version="1.0" skin="sketch" fontFace="Balsamiq Sans" measuredW="1124" measuredH="994" mockupW="701" mockupH="970">
<controls>
<control controlID="0" controlTypeID="com.balsamiq.mockups::BrowserWindow" x="423" y="24" w="701" h="970" measuredW="450" measuredH="400" zOrder="0" locked="false" isInGroup="-1">
<controlProperties>
<text>Spider%20List%20Page%0Ahttp%3A//localhost%3A8080/spider/list</text>
</controlProperties>
</control>
<control controlID="1" controlTypeID="com.balsamiq.mockups::DataGrid" x="467" y="266" w="609" h="275" measuredW="612" measuredH="111" zOrder="4" locked="false" isInGroup="-1">
<controlProperties>
<text>Spider%20%2C%20Add%20Time%20%5Ev%2CPages%20Total%20%5Ev%2C%20Pages%20Downloaded%20%5Ev%2C%20Error%20%5Ev%2C%20%20Operation%0Agithub.com%2C%202014-3-1.12%3A20%3A10%2C1221%2C%20595%2C%204%2C%20Stop%20Edit%20Delete%0Aoschina.net%2C2014-2-12.16%3A10%3A20%2C120%2C%20%20120%2C%200%2C%20Start%20Edit%20Delete%0Aappstore.com%2C2014-2-10.9%3A20%3A10%2C100000%2C100000%2C%200%2CStart%20Edit%20Delete</text>
</controlProperties>
</control>
<control controlID="7" controlTypeID="com.balsamiq.mockups::FieldSet" x="452" y="108" w="641" h="93" measuredW="200" measuredH="170" zOrder="3" locked="false" isInGroup="-1">
<controlProperties>
<text>Works</text>
</controlProperties>
</control>
<control controlID="9" controlTypeID="__group__" x="478" y="132" w="347" h="22" measuredW="347" measuredH="22" zOrder="5" locked="false" isInGroup="-1">
<groupChildrenDescriptors>
<control controlID="0" controlTypeID="com.balsamiq.mockups::CheckBox" x="55" y="0" w="73" h="22" measuredW="60" measuredH="22" zOrder="0" locked="false" isInGroup="9">
<controlProperties>
<state>selected</state>
<text>10.1.2.1</text>
</controlProperties>
</control>
<control controlID="1" controlTypeID="com.balsamiq.mockups::CheckBox" x="128" y="0" w="73" h="22" measuredW="63" measuredH="22" zOrder="1" locked="false" isInGroup="9">
<controlProperties>
<text>10.1.2.2</text>
</controlProperties>
</control>
<control controlID="2" controlTypeID="com.balsamiq.mockups::CheckBox" x="201" y="0" w="73" h="22" measuredW="63" measuredH="22" zOrder="2" locked="false" isInGroup="9">
<controlProperties>
<state>selected</state>
<text>10.1.2.3</text>
</controlProperties>
</control>
<control controlID="3" controlTypeID="com.balsamiq.mockups::CheckBox" x="274" y="0" w="73" h="22" measuredW="63" measuredH="22" zOrder="3" locked="false" isInGroup="9">
<controlProperties>
<text>10.1.2.4</text>
</controlProperties>
</control>
<control controlID="4" controlTypeID="com.balsamiq.mockups::CheckBox" x="0" y="0" w="73" h="22" measuredW="34" measuredH="22" zOrder="4" locked="false" isInGroup="9">
<controlProperties>
<text>all</text>
</controlProperties>
</control>
</groupChildrenDescriptors>
</control>
<control controlID="10" controlTypeID="__group__" x="520" y="617" w="484" h="257" measuredW="484" measuredH="257" zOrder="6" locked="false" isInGroup="-1">
<groupChildrenDescriptors>
<control controlID="0" controlTypeID="com.balsamiq.mockups::LineChart" x="75" y="46" w="409" h="175" measuredW="187" measuredH="175" zOrder="0" locked="false" isInGroup="10"/>
<control controlID="1" controlTypeID="com.balsamiq.mockups::Label" x="75" y="0" w="-1" h="-1" measuredW="60" measuredH="21" zOrder="1" locked="false" isInGroup="10">
<controlProperties>
<text>Real%20Time</text>
</controlProperties>
</control>
<control controlID="2" controlTypeID="com.balsamiq.mockups::Label" x="0" y="156" w="-1" h="-1" measuredW="36" measuredH="21" zOrder="2" locked="false" isInGroup="10">
<controlProperties>
<text>Pages</text>
</controlProperties>
</control>
<control controlID="3" controlTypeID="com.balsamiq.mockups::Label" x="277" y="236" w="-1" h="-1" measuredW="29" measuredH="21" zOrder="3" locked="false" isInGroup="10">
<controlProperties>
<text>Time</text>
</controlProperties>
</control>
</groupChildrenDescriptors>
</control>
<control controlID="11" controlTypeID="com.balsamiq.mockups::TextInput" x="889" y="228" w="115" h="-1" measuredW="65" measuredH="27" zOrder="7" locked="false" isInGroup="-1">
<controlProperties>
<text>Keyword</text>
</controlProperties>
</control>
<control controlID="13" controlTypeID="com.balsamiq.mockups::Button" x="1013" y="228" w="63" h="27" measuredW="63" measuredH="27" zOrder="8" locked="false" isInGroup="-1">
<controlProperties>
<text>Search</text>
</controlProperties>
</control>
<control controlID="16" controlTypeID="com.balsamiq.mockups::FieldSet" x="452" y="213" w="641" h="352" measuredW="200" measuredH="170" zOrder="2" locked="false" isInGroup="-1">
<controlProperties>
<text>Spiders</text>
</controlProperties>
</control>
<control controlID="17" controlTypeID="com.balsamiq.mockups::FieldSet" x="452" y="584" w="641" h="352" measuredW="200" measuredH="170" zOrder="1" locked="false" isInGroup="-1">
<controlProperties>
<text>Charts</text>
</controlProperties>
</control>
<control controlID="19" controlTypeID="com.balsamiq.mockups::DateChooser" x="530" y="229" w="-1" h="-1" measuredW="100" measuredH="25" zOrder="9" locked="false" isInGroup="-1">
<controlProperties>
<text>2014-2-1</text>
</controlProperties>
</control>
<control controlID="20" controlTypeID="com.balsamiq.mockups::DateChooser" x="665" y="229" w="-1" h="-1" measuredW="100" measuredH="25" zOrder="10" locked="false" isInGroup="-1">
<controlProperties>
<text>2014-3-1</text>
</controlProperties>
</control>
<control controlID="21" controlTypeID="com.balsamiq.mockups::Label" x="462" y="228" w="-1" h="-1" measuredW="60" measuredH="21" zOrder="11" locked="false" isInGroup="-1">
<controlProperties>
<text>Time%20from</text>
</controlProperties>
</control>
<control controlID="22" controlTypeID="com.balsamiq.mockups::Label" x="641" y="231" w="-1" h="-1" measuredW="12" measuredH="21" zOrder="12" locked="false" isInGroup="-1">
<controlProperties>
<text>to</text>
</controlProperties>
</control>
</controls>
</mockup>
\ No newline at end of file
Release Notes
----
See latest versions in [https://github.com/code4craft/webmagic/releases](https://github.com/code4craft/webmagic/releases)
*2012-9-4* `version:0.3.0`
* Change default XPath selector from HtmlCleaner to [Xsoup](https://github.com/code4craft/xsoup).
[Xsoup](https://github.com/code4craft/xsoup) is an XPath selector based on Jsoup written by me. It has much better performance than HtmlCleaner.
Time of processing a page is reduced from 7~9ms to 0.4ms.
If Xsoup is not stable for your usage, just use `Spider.xsoupOff()` to turn off it and report an issue to me!
* Add cycle retry times for Site.
When cycle retry times is set, Spider will put the url which downloading failed back to scheduler, and retry after a cycle of queue.
*2012-8-20* `version:0.2.1`
ComboExtractor support for annotation.
Request priority support (using `PriorityScheduler`).
Complete some I18n work (comments and documents).
More convenient extractor API:
* Add attribute name select for CSSSelector.
* Group of regex selector can be specified.
* Add OrSelector.
* Add Selectors, import static Selectors.* for fluent API such as:
or(regex("<title>(.*)</title>"), xpath("//title"), $("title")).select(s);
* Add JsonPathSelector for Json parse.
*2012-8-9* `version:0.2.0`
此次更新的主题是"方便"(之前的主题是"灵活")。
增加了webmagic-extension模块。
增加了注解方式支持,可以通过POJO+注解的方式编写一个爬虫,更符合Java开发习惯。以下是抓取一个博客的完整代码:
@TargetUrl("http://my.oschina.net/flashsword/blog/\\d+")
public class OschinaBlog {
@ExtractBy("//title")
private String title;
@ExtractBy(value = "div.BlogContent",type = ExtractBy.Type.Css)
private String content;
@ExtractBy(value = "//div[@class='BlogTags']/a/text()", multi = true)
private List<String> tags;
public static void main(String[] args) {
OOSpider.create(Site.me().addStartUrl("http://my.oschina.net/flashsword/blog"),
new ConsolePageModelPipeline(), OschinaBlog.class)
.scheduler(new RedisScheduler("127.0.0.1")).thread(5).run();
}
}
增加一个Spider.test(url)方法,用于开发爬虫时进行调试。
增加基于redis的分布式支持。
增加XPath2.0语法支持(webmagic-saxon模块)。
增加基于Selenium的浏览器渲染支持,用于抓取动态加载内容(webmagic-selenium模块)。
修复了不支持https的bug。
补充了文档:[webmagic-0.2.0用户手册](http://code4craft.github.io/webmagic/)
*2012-7-25* `version:0.1.0`
第一个稳定版本。
修改了若干API,使得可扩展性更强,为每个任务分配一个ID,可以通过ID区分不同任务。
重写了Pipeline接口,将抽取结果集包装到ResultItems对象,而不是通用一个Page对象,便于逻辑分离。
增加下载的重试机制,支持gzip,支持自定义UA/cookie。
增加多线程抓取功能,只需在初始化的时候指定线程数即可。
增加jquery形式的CSS Selector API,可以通过`page.getHtml().$("div.body")`形式抽取元素。
完善了文档,架构说明:[webmagic的设计机制及原理-如何开发一个Java爬虫](http://my.oschina.net/flashsword/blog/145796),Javadoc:[http://code4craft.github.io/webmagic/docs](http://code4craft.github.io/webmagic/docs)
\ No newline at end of file
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment