Attachment 'cqp-code.txt'
Download 1 #To start:
2 $ cqp -e
3
4 #To see available corpora:
5 show corpora
6
7 #Show information about a given corpus:
8 info BNC-XML
9
10 #Activate corpus:
11 BNC-XML
12
13 #Show corpus attributes:
14 show cd
15
16 #Search for words (via regular expressions) and sort:
17 "spe(ech|aks?(ing)?)"
18 sort by word
19
20 #Set context to 8 words preceding the target, 2 sentences following the target, 1 sentence pre- and post-target:
21 set lc 8 words
22 set rc 2s
23 set c s
24
25 #Redisplay matches:
26 cat
27
28 #Display or hide POS and lemma annotation:
29 show +pos
30 show +lemma
31 show -pos -lemma
32
33 #Search by lemma:
34 [lemma = "speak_VERB"]
35 [lemma = "speech_SUBST"]
36 [lemma = "(speak_VERB|speech_SUBST)"]
37
38 #See size of last query:
39 size Last
40
41 #Show structural attributes (shown as XML tags):
42 show +s
43
44 #Create .cqprc file with favorite settings:
45 set ProgressBar on;
46 set HistoryFile "/tmp/cqphistory.jdegen";
47 set WriteHistory yes;
48 set c s;
49
50 #Searching for POS information:
51 "work"
52 [word="work" & pos="N.*"]
53 [word="work" & pos="V.*"]
54 [word="work" & pos !="V.*"]
55
56 #Use \verb|/codist[]| macro to get frequency distributions of POS-tags/lemmas over a given word:
57 /codist["work", pos]
58 /codist[lemma, "speak_VERB", word]
59
60
61 #Search for sequences, search within a context:
62 [lemma="work_VERB"][]*[word="day"]
63 [lemma="work_VERB"][]*[word="day"] within s
64 [lemma="work_VERB"][]*[word="day"] within 2 words
65 [lemma="work_VERB"][]{2}[word="day"]
66
67 #Count:
68 count by word
69 count by lemma
70
71 #Set frequency thresholds:
72 [pos="VVB" & word = "w.*"]
73 count by lemma cut 50
74
75 #Save query results:
76 Some = [word = "some" %c] [pos="NN2*"]
77 set DataDirectory "."
78 BNC-XML
79 save Some
80 cat Some > "some.txt"
81 cat Some > "| gzip > some.txt.gz"
82 sort Some by word
83
84 #Anchor points:
85 A = [pos="(AT.*|DT.*)"] @[pos="AJ.*" & word="f.*"] [pos="N.*"]
86 sort by word
87
88 #Display corpus positions of anchor points in tabular format:
89 dump A
90 dump A 10 20
91
92 #Frequency distributions:
93 group A matchend word by target word cut 100
94 group A match word by target lemma cut 100
95
96 #Reduce data randomly:
97 reduce A to 10%
Attached Files
To refer to attachments on a page, use attachment:filename, as shown below in the list of files. Do NOT use the URL of the [get] link, since this is subject to change and can break easily.You are not allowed to attach a file to this page.